forked from Minki/linux
net/netfilter: Add unstable CT lookup helpers for XDP and TC-BPF
This change adds conntrack lookup helpers using the unstable kfunc call interface for the XDP and TC-BPF hooks. The primary usecase is implementing a synproxy in XDP, see Maxim's patchset [0]. Export get_net_ns_by_id as nf_conntrack_bpf.c needs to call it. This object is only built when CONFIG_DEBUG_INFO_BTF_MODULES is enabled. [0]: https://lore.kernel.org/bpf/20211019144655.3483197-1-maximmi@nvidia.com Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> Link: https://lore.kernel.org/r/20220114163953.1455836-7-memxor@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
5c073f26f9
commit
b4c2b9593a
23
include/net/netfilter/nf_conntrack_bpf.h
Normal file
23
include/net/netfilter/nf_conntrack_bpf.h
Normal file
@ -0,0 +1,23 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef _NF_CONNTRACK_BPF_H
|
||||
#define _NF_CONNTRACK_BPF_H
|
||||
|
||||
#include <linux/btf.h>
|
||||
#include <linux/kconfig.h>
|
||||
|
||||
#if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
|
||||
(IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
|
||||
|
||||
extern int register_nf_conntrack_bpf(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline int register_nf_conntrack_bpf(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _NF_CONNTRACK_BPF_H */
|
@ -299,6 +299,7 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
|
||||
|
||||
return peer;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
|
||||
|
||||
/*
|
||||
* setup_net runs the initializers for the network namespace object.
|
||||
|
@ -14,6 +14,11 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
|
||||
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
|
||||
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
|
||||
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
|
||||
ifeq ($(CONFIG_NF_CONNTRACK),m)
|
||||
nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o
|
||||
else ifeq ($(CONFIG_NF_CONNTRACK),y)
|
||||
nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_NETFILTER) = netfilter.o
|
||||
|
||||
|
257
net/netfilter/nf_conntrack_bpf.c
Normal file
257
net/netfilter/nf_conntrack_bpf.c
Normal file
@ -0,0 +1,257 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Unstable Conntrack Helpers for XDP and TC-BPF hook
|
||||
*
|
||||
* These are called from the XDP and SCHED_CLS BPF programs. Note that it is
|
||||
* allowed to break compatibility for these functions since the interface they
|
||||
* are exposed through to BPF programs is explicitly unstable.
|
||||
*/
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/net_namespace.h>
|
||||
#include <net/netfilter/nf_conntrack.h>
|
||||
#include <net/netfilter/nf_conntrack_core.h>
|
||||
|
||||
/* bpf_ct_opts - Options for CT lookup helpers
|
||||
*
|
||||
* Members:
|
||||
* @netns_id - Specify the network namespace for lookup
|
||||
* Values:
|
||||
* BPF_F_CURRENT_NETNS (-1)
|
||||
* Use namespace associated with ctx (xdp_md, __sk_buff)
|
||||
* [0, S32_MAX]
|
||||
* Network Namespace ID
|
||||
* @error - Out parameter, set for any errors encountered
|
||||
* Values:
|
||||
* -EINVAL - Passed NULL for bpf_tuple pointer
|
||||
* -EINVAL - opts->reserved is not 0
|
||||
* -EINVAL - netns_id is less than -1
|
||||
* -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
|
||||
* -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
|
||||
* -ENONET - No network namespace found for netns_id
|
||||
* -ENOENT - Conntrack lookup could not find entry for tuple
|
||||
* -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
|
||||
* or sizeof(tuple->ipv6)
|
||||
* @l4proto - Layer 4 protocol
|
||||
* Values:
|
||||
* IPPROTO_TCP, IPPROTO_UDP
|
||||
* @reserved - Reserved member, will be reused for more options in future
|
||||
* Values:
|
||||
* 0
|
||||
*/
|
||||
struct bpf_ct_opts {
|
||||
s32 netns_id;
|
||||
s32 error;
|
||||
u8 l4proto;
|
||||
u8 reserved[3];
|
||||
};
|
||||
|
||||
enum {
|
||||
NF_BPF_CT_OPTS_SZ = 12,
|
||||
};
|
||||
|
||||
static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
|
||||
struct bpf_sock_tuple *bpf_tuple,
|
||||
u32 tuple_len, u8 protonum,
|
||||
s32 netns_id)
|
||||
{
|
||||
struct nf_conntrack_tuple_hash *hash;
|
||||
struct nf_conntrack_tuple tuple;
|
||||
|
||||
if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
|
||||
return ERR_PTR(-EPROTO);
|
||||
if (unlikely(netns_id < BPF_F_CURRENT_NETNS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
memset(&tuple, 0, sizeof(tuple));
|
||||
switch (tuple_len) {
|
||||
case sizeof(bpf_tuple->ipv4):
|
||||
tuple.src.l3num = AF_INET;
|
||||
tuple.src.u3.ip = bpf_tuple->ipv4.saddr;
|
||||
tuple.src.u.tcp.port = bpf_tuple->ipv4.sport;
|
||||
tuple.dst.u3.ip = bpf_tuple->ipv4.daddr;
|
||||
tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport;
|
||||
break;
|
||||
case sizeof(bpf_tuple->ipv6):
|
||||
tuple.src.l3num = AF_INET6;
|
||||
memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
|
||||
tuple.src.u.tcp.port = bpf_tuple->ipv6.sport;
|
||||
memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
|
||||
tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport;
|
||||
break;
|
||||
default:
|
||||
return ERR_PTR(-EAFNOSUPPORT);
|
||||
}
|
||||
|
||||
tuple.dst.protonum = protonum;
|
||||
|
||||
if (netns_id >= 0) {
|
||||
net = get_net_ns_by_id(net, netns_id);
|
||||
if (unlikely(!net))
|
||||
return ERR_PTR(-ENONET);
|
||||
}
|
||||
|
||||
hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
|
||||
if (netns_id >= 0)
|
||||
put_net(net);
|
||||
if (!hash)
|
||||
return ERR_PTR(-ENOENT);
|
||||
return nf_ct_tuplehash_to_ctrack(hash);
|
||||
}
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore(GCC, 8, "-Wmissing-prototypes",
|
||||
"Global functions as their definitions will be in nf_conntrack BTF");
|
||||
|
||||
/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
|
||||
* reference to it
|
||||
*
|
||||
* Parameters:
|
||||
* @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
|
||||
* Cannot be NULL
|
||||
* @bpf_tuple - Pointer to memory representing the tuple to look up
|
||||
* Cannot be NULL
|
||||
* @tuple__sz - Length of the tuple structure
|
||||
* Must be one of sizeof(bpf_tuple->ipv4) or
|
||||
* sizeof(bpf_tuple->ipv6)
|
||||
* @opts - Additional options for lookup (documented above)
|
||||
* Cannot be NULL
|
||||
* @opts__sz - Length of the bpf_ct_opts structure
|
||||
* Must be NF_BPF_CT_OPTS_SZ (12)
|
||||
*/
|
||||
struct nf_conn *
|
||||
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
|
||||
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
|
||||
{
|
||||
struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
|
||||
struct net *caller_net;
|
||||
struct nf_conn *nfct;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
|
||||
|
||||
if (!opts)
|
||||
return NULL;
|
||||
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
|
||||
opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
|
||||
opts->error = -EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
caller_net = dev_net(ctx->rxq->dev);
|
||||
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
|
||||
opts->netns_id);
|
||||
if (IS_ERR(nfct)) {
|
||||
opts->error = PTR_ERR(nfct);
|
||||
return NULL;
|
||||
}
|
||||
return nfct;
|
||||
}
|
||||
|
||||
/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
|
||||
* reference to it
|
||||
*
|
||||
* Parameters:
|
||||
* @skb_ctx - Pointer to ctx (__sk_buff) in TC program
|
||||
* Cannot be NULL
|
||||
* @bpf_tuple - Pointer to memory representing the tuple to look up
|
||||
* Cannot be NULL
|
||||
* @tuple__sz - Length of the tuple structure
|
||||
* Must be one of sizeof(bpf_tuple->ipv4) or
|
||||
* sizeof(bpf_tuple->ipv6)
|
||||
* @opts - Additional options for lookup (documented above)
|
||||
* Cannot be NULL
|
||||
* @opts__sz - Length of the bpf_ct_opts structure
|
||||
* Must be NF_BPF_CT_OPTS_SZ (12)
|
||||
*/
|
||||
struct nf_conn *
|
||||
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
|
||||
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
|
||||
struct net *caller_net;
|
||||
struct nf_conn *nfct;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ);
|
||||
|
||||
if (!opts)
|
||||
return NULL;
|
||||
if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
|
||||
opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) {
|
||||
opts->error = -EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
|
||||
nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto,
|
||||
opts->netns_id);
|
||||
if (IS_ERR(nfct)) {
|
||||
opts->error = PTR_ERR(nfct);
|
||||
return NULL;
|
||||
}
|
||||
return nfct;
|
||||
}
|
||||
|
||||
/* bpf_ct_release - Release acquired nf_conn object
|
||||
*
|
||||
* This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
|
||||
* the program if any references remain in the program in all of the explored
|
||||
* states.
|
||||
*
|
||||
* Parameters:
|
||||
* @nf_conn - Pointer to referenced nf_conn object, obtained using
|
||||
* bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
|
||||
*/
|
||||
void bpf_ct_release(struct nf_conn *nfct)
|
||||
{
|
||||
if (!nfct)
|
||||
return;
|
||||
nf_ct_put(nfct);
|
||||
}
|
||||
|
||||
__diag_pop()
|
||||
|
||||
BTF_SET_START(nf_ct_xdp_check_kfunc_ids)
|
||||
BTF_ID(func, bpf_xdp_ct_lookup)
|
||||
BTF_ID(func, bpf_ct_release)
|
||||
BTF_SET_END(nf_ct_xdp_check_kfunc_ids)
|
||||
|
||||
BTF_SET_START(nf_ct_tc_check_kfunc_ids)
|
||||
BTF_ID(func, bpf_skb_ct_lookup)
|
||||
BTF_ID(func, bpf_ct_release)
|
||||
BTF_SET_END(nf_ct_tc_check_kfunc_ids)
|
||||
|
||||
BTF_SET_START(nf_ct_acquire_kfunc_ids)
|
||||
BTF_ID(func, bpf_xdp_ct_lookup)
|
||||
BTF_ID(func, bpf_skb_ct_lookup)
|
||||
BTF_SET_END(nf_ct_acquire_kfunc_ids)
|
||||
|
||||
BTF_SET_START(nf_ct_release_kfunc_ids)
|
||||
BTF_ID(func, bpf_ct_release)
|
||||
BTF_SET_END(nf_ct_release_kfunc_ids)
|
||||
|
||||
/* Both sets are identical */
|
||||
#define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids
|
||||
|
||||
static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.check_set = &nf_ct_xdp_check_kfunc_ids,
|
||||
.acquire_set = &nf_ct_acquire_kfunc_ids,
|
||||
.release_set = &nf_ct_release_kfunc_ids,
|
||||
.ret_null_set = &nf_ct_ret_null_kfunc_ids,
|
||||
};
|
||||
|
||||
static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.check_set = &nf_ct_tc_check_kfunc_ids,
|
||||
.acquire_set = &nf_ct_acquire_kfunc_ids,
|
||||
.release_set = &nf_ct_release_kfunc_ids,
|
||||
.ret_null_set = &nf_ct_ret_null_kfunc_ids,
|
||||
};
|
||||
|
||||
int register_nf_conntrack_bpf(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set);
|
||||
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set);
|
||||
}
|
@ -34,6 +34,7 @@
|
||||
#include <linux/rculist_nulls.h>
|
||||
|
||||
#include <net/netfilter/nf_conntrack.h>
|
||||
#include <net/netfilter/nf_conntrack_bpf.h>
|
||||
#include <net/netfilter/nf_conntrack_l4proto.h>
|
||||
#include <net/netfilter/nf_conntrack_expect.h>
|
||||
#include <net/netfilter/nf_conntrack_helper.h>
|
||||
@ -2748,8 +2749,15 @@ int nf_conntrack_init_start(void)
|
||||
conntrack_gc_work_init(&conntrack_gc_work);
|
||||
queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ);
|
||||
|
||||
ret = register_nf_conntrack_bpf();
|
||||
if (ret < 0)
|
||||
goto err_kfunc;
|
||||
|
||||
return 0;
|
||||
|
||||
err_kfunc:
|
||||
cancel_delayed_work_sync(&conntrack_gc_work.dwork);
|
||||
nf_conntrack_proto_fini();
|
||||
err_proto:
|
||||
nf_conntrack_seqadj_fini();
|
||||
err_seqadj:
|
||||
|
Loading…
Reference in New Issue
Block a user