Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) A new selftest for nf_queue, from Florian Westphal. This test
   covers two recent fixes: 07f8e4d0fd ("tcp: also NULL skb->dev
   when copy was needed") and b738a185be ("tcp: ensure skb->dev is
   NULL before leaving TCP stack").

2) The fwd action breaks with ifb. For safety in next extensions,
   make sure the fwd action only runs from ingress until it is extended
   to be used from a different hook.

3) The pipapo set type now reports EEXIST in case of subrange overlaps.
   Update the rbtree set to validate range overlaps, so far this
   validation is only done only from userspace. From Stefano Brivio.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-03-24 17:30:40 -07:00
commit 6f000f9878
8 changed files with 818 additions and 17 deletions

View File

@ -5082,6 +5082,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -EBUSY;
else if (!(nlmsg_flags & NLM_F_EXCL))
err = 0;
} else if (err == -ENOTEMPTY) {
/* ENOTEMPTY reports overlapping between this element
* and an existing one.
*/
err = -EEXIST;
}
goto err_element_clash;
}

View File

@ -28,6 +28,10 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
struct nft_fwd_netdev *priv = nft_expr_priv(expr);
int oif = regs->data[priv->sreg_dev];
/* These are used by ifb only. */
pkt->skb->tc_redirected = 1;
pkt->skb->tc_from_ingress = 1;
nf_fwd_netdev_egress(pkt, oif);
regs->verdict.code = NF_STOLEN;
}
@ -190,6 +194,13 @@ nla_put_failure:
return -1;
}
static int nft_fwd_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS));
}
static struct nft_expr_type nft_fwd_netdev_type;
static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.type = &nft_fwd_netdev_type,
@ -197,6 +208,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
.eval = nft_fwd_neigh_eval,
.init = nft_fwd_neigh_init,
.dump = nft_fwd_neigh_dump,
.validate = nft_fwd_validate,
};
static const struct nft_expr_ops nft_fwd_netdev_ops = {
@ -205,6 +217,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.eval = nft_fwd_netdev_eval,
.init = nft_fwd_netdev_init,
.dump = nft_fwd_netdev_dump,
.validate = nft_fwd_validate,
.offload = nft_fwd_netdev_offload,
};

View File

@ -1098,21 +1098,41 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_field *f;
int i, bsize_max, err = 0;
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
end = (const u8 *)nft_set_ext_key_end(ext)->data;
else
end = start;
dup = pipapo_get(net, set, start, genmask);
if (PTR_ERR(dup) == -ENOENT) {
if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) {
end = (const u8 *)nft_set_ext_key_end(ext)->data;
dup = pipapo_get(net, set, end, nft_genmask_next(net));
} else {
end = start;
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
dup_key = nft_set_ext_key(&dup->ext);
if (nft_set_ext_exists(&dup->ext, NFT_SET_EXT_KEY_END))
dup_end = nft_set_ext_key_end(&dup->ext);
else
dup_end = dup_key;
if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) &&
!memcmp(end, dup_end->data, sizeof(*dup_end->data))) {
*ext2 = &dup->ext;
return -EEXIST;
}
return -ENOTEMPTY;
}
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
dup = pipapo_get(net, set, end, nft_genmask_next(net));
}
if (PTR_ERR(dup) != -ENOENT) {
if (IS_ERR(dup))
return PTR_ERR(dup);
*ext2 = &dup->ext;
return -EEXIST;
return -ENOTEMPTY;
}
/* Validate */

View File

@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
}
static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
{
return !nft_rbtree_interval_end(rbe);
}
static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
const struct nft_rbtree_elem *interval)
{
@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (interval &&
nft_rbtree_equal(set, this, interval) &&
nft_rbtree_interval_end(rbe) &&
!nft_rbtree_interval_end(interval))
nft_rbtree_interval_start(interval))
continue;
interval = rbe;
} else if (d > 0)
@ -89,7 +94,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
!nft_rbtree_interval_end(interval)) {
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
}
@ -208,8 +213,43 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
bool overlap = false;
int d;
/* Detect overlaps as we descend the tree. Set the flag in these cases:
*
* a1. |__ _ _? >|__ _ _ (insert start after existing start)
* a2. _ _ __>| ?_ _ __| (insert end before existing end)
* a3. _ _ ___| ?_ _ _>| (insert end after existing end)
* a4. >|__ _ _ _ _ __| (insert start before existing end)
*
* and clear it later on, as we eventually reach the points indicated by
* '?' above, in the cases described below. We'll always meet these
* later, locally, due to tree ordering, and overlaps for the intervals
* that are the closest together are always evaluated last.
*
* b1. |__ _ _! >|__ _ _ (insert start after existing end)
* b2. _ _ __>| !_ _ __| (insert end before existing start)
* b3. !_____>| (insert end after existing start)
*
* Case a4. resolves to b1.:
* - if the inserted start element is the leftmost, because the '0'
* element in the tree serves as end element
* - otherwise, if an existing end is found. Note that end elements are
* always inserted after corresponding start elements.
*
* For a new, rightmost pair of elements, we'll hit cases b1. and b3.,
* in that order.
*
* The flag is also cleared in two special cases:
*
* b4. |__ _ _!|<_ _ _ (insert start right before existing end)
* b5. |__ _ >|!__ _ _ (insert end right after existing start)
*
* which always happen as last step and imply that no further
* overlapping is possible.
*/
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
@ -218,17 +258,42 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
d = memcmp(nft_set_ext_key(&rbe->ext),
nft_set_ext_key(&new->ext),
set->klen);
if (d < 0)
if (d < 0) {
p = &parent->rb_left;
else if (d > 0)
if (nft_rbtree_interval_start(new)) {
overlap = nft_rbtree_interval_start(rbe) &&
nft_set_elem_active(&rbe->ext,
genmask);
} else {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
genmask);
}
} else if (d > 0) {
p = &parent->rb_right;
else {
if (nft_rbtree_interval_end(new)) {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
genmask);
} else if (nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext, genmask)) {
overlap = true;
}
} else {
if (nft_rbtree_interval_end(rbe) &&
!nft_rbtree_interval_end(new)) {
nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
} else if (!nft_rbtree_interval_end(rbe) &&
if (nft_set_elem_active(&rbe->ext, genmask))
overlap = false;
} else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
if (nft_set_elem_active(&rbe->ext, genmask))
overlap = false;
} else if (nft_set_elem_active(&rbe->ext, genmask)) {
*ext = &rbe->ext;
return -EEXIST;
@ -237,6 +302,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
}
}
}
if (overlap)
return -ENOTEMPTY;
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@ -317,10 +386,10 @@ static void *nft_rbtree_deactivate(const struct net *net,
parent = parent->rb_right;
else {
if (nft_rbtree_interval_end(rbe) &&
!nft_rbtree_interval_end(this)) {
nft_rbtree_interval_start(this)) {
parent = parent->rb_left;
continue;
} else if (!nft_rbtree_interval_end(rbe) &&
} else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;

View File

@ -3,6 +3,10 @@
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh
nft_concat_range.sh \
nft_queue.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
include ../lib.mk

View File

@ -1,2 +1,8 @@
CONFIG_NET_NS=y
CONFIG_NF_TABLES_INET=y
CONFIG_NFT_QUEUE=m
CONFIG_NFT_NAT=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_MASQ=m
CONFIG_NFT_FLOW_OFFLOAD=m
CONFIG_NF_CT_NETLINK=m

View File

@ -0,0 +1,352 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <time.h>
#include <arpa/inet.h>
#include <libmnl/libmnl.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
struct options {
bool count_packets;
int verbose;
unsigned int queue_num;
unsigned int timeout;
};
static unsigned int queue_stats[5];
static struct options opts;
static void help(const char *p)
{
printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
{
const struct nlattr **tb = data;
int type = mnl_attr_get_type(attr);
/* skip unsupported attribute in user-space */
if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
return MNL_CB_OK;
switch (type) {
case NFQA_MARK:
case NFQA_IFINDEX_INDEV:
case NFQA_IFINDEX_OUTDEV:
case NFQA_IFINDEX_PHYSINDEV:
case NFQA_IFINDEX_PHYSOUTDEV:
if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
perror("mnl_attr_validate");
return MNL_CB_ERROR;
}
break;
case NFQA_TIMESTAMP:
if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
perror("mnl_attr_validate2");
return MNL_CB_ERROR;
}
break;
case NFQA_HWADDR:
if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
sizeof(struct nfqnl_msg_packet_hw)) < 0) {
perror("mnl_attr_validate2");
return MNL_CB_ERROR;
}
break;
case NFQA_PAYLOAD:
break;
}
tb[type] = attr;
return MNL_CB_OK;
}
static int queue_cb(const struct nlmsghdr *nlh, void *data)
{
struct nlattr *tb[NFQA_MAX+1] = { 0 };
struct nfqnl_msg_packet_hdr *ph = NULL;
uint32_t id = 0;
(void)data;
mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb);
if (tb[NFQA_PACKET_HDR]) {
ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]);
id = ntohl(ph->packet_id);
if (opts.verbose > 0)
printf("packet hook=%u, hwproto 0x%x",
ntohs(ph->hw_protocol), ph->hook);
if (ph->hook >= 5) {
fprintf(stderr, "Unknown hook %d\n", ph->hook);
return MNL_CB_ERROR;
}
if (opts.verbose > 0) {
uint32_t skbinfo = 0;
if (tb[NFQA_SKB_INFO])
skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO]));
if (skbinfo & NFQA_SKB_CSUMNOTREADY)
printf(" csumnotready");
if (skbinfo & NFQA_SKB_GSO)
printf(" gso");
if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED)
printf(" csumnotverified");
puts("");
}
if (opts.count_packets)
queue_stats[ph->hook]++;
}
return MNL_CB_OK + id;
}
static struct nlmsghdr *
nfq_build_cfg_request(char *buf, uint8_t command, int queue_num)
{
struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
struct nfqnl_msg_config_cmd cmd = {
.command = command,
.pf = htons(AF_INET),
};
struct nfgenmsg *nfg;
nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
nlh->nlmsg_flags = NLM_F_REQUEST;
nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
nfg->nfgen_family = AF_UNSPEC;
nfg->version = NFNETLINK_V0;
nfg->res_id = htons(queue_num);
mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
return nlh;
}
static struct nlmsghdr *
nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
{
struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
struct nfqnl_msg_config_params params = {
.copy_range = htonl(range),
.copy_mode = mode,
};
struct nfgenmsg *nfg;
nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
nlh->nlmsg_flags = NLM_F_REQUEST;
nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
nfg->nfgen_family = AF_UNSPEC;
nfg->version = NFNETLINK_V0;
nfg->res_id = htons(queue_num);
mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), &params);
return nlh;
}
static struct nlmsghdr *
nfq_build_verdict(char *buf, int id, int queue_num, int verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
.id = htonl(id),
};
struct nlmsghdr *nlh;
struct nfgenmsg *nfg;
nlh = mnl_nlmsg_put_header(buf);
nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT;
nlh->nlmsg_flags = NLM_F_REQUEST;
nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
nfg->nfgen_family = AF_UNSPEC;
nfg->version = NFNETLINK_V0;
nfg->res_id = htons(queue_num);
mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
return nlh;
}
static void print_stats(void)
{
unsigned int last, total;
int i;
if (!opts.count_packets)
return;
total = 0;
last = queue_stats[0];
for (i = 0; i < 5; i++) {
printf("hook %d packets %08u\n", i, queue_stats[i]);
last = queue_stats[i];
total += last;
}
printf("%u packets total\n", total);
}
struct mnl_socket *open_queue(void)
{
char buf[MNL_SOCKET_BUFFER_SIZE];
unsigned int queue_num;
struct mnl_socket *nl;
struct nlmsghdr *nlh;
struct timeval tv;
uint32_t flags;
nl = mnl_socket_open(NETLINK_NETFILTER);
if (nl == NULL) {
perror("mnl_socket_open");
exit(EXIT_FAILURE);
}
if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
perror("mnl_socket_bind");
exit(EXIT_FAILURE);
}
queue_num = opts.queue_num;
nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
}
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
}
memset(&tv, 0, sizeof(tv));
tv.tv_sec = opts.timeout;
if (opts.timeout && setsockopt(mnl_socket_get_fd(nl),
SOL_SOCKET, SO_RCVTIMEO,
&tv, sizeof(tv))) {
perror("setsockopt(SO_RCVTIMEO)");
exit(EXIT_FAILURE);
}
return nl;
}
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
struct mnl_socket *nl;
struct nlmsghdr *nlh;
unsigned int portid;
char *buf;
int ret;
buf = malloc(buflen);
if (!buf) {
perror("malloc");
exit(EXIT_FAILURE);
}
nl = open_queue();
portid = mnl_socket_get_portid(nl);
for (;;) {
uint32_t id;
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
if (errno == ENOBUFS)
continue;
if (errno == EAGAIN) {
errno = 0;
ret = 0;
break;
}
perror("mnl_socket_recvfrom");
exit(EXIT_FAILURE);
}
ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
if (ret < 0) {
perror("mnl_cb_run");
exit(EXIT_FAILURE);
}
id = ret - MNL_CB_OK;
nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
}
}
mnl_socket_close(nl);
return ret;
}
static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
break;
case 'h':
help(argv[0]);
exit(0);
break;
case 'q':
opts.queue_num = atoi(optarg);
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
case 't':
opts.timeout = atoi(optarg);
break;
case 'v':
opts.verbose++;
break;
}
}
}
int main(int argc, char *argv[])
{
int ret;
parse_opts(argc, argv);
ret = mainloop();
if (opts.count_packets)
print_stats();
return ret;
}

View File

@ -0,0 +1,332 @@
#!/bin/bash
#
# This tests nf_queue:
# 1. can process packets from all hooks
# 2. support running nfqueue from more than one base chain
#
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
cleanup()
{
ip netns del ${ns1}
ip netns del ${ns2}
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
}
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
ip netns add ${nsrouter}
if [ $? -ne 0 ];then
echo "SKIP: Could not create net namespace"
exit $ksft_skip
fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
ip netns add ${ns2}
ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
ip -net ${nsrouter} link set lo up
ip -net ${nsrouter} link set veth0 up
ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
ip -net ${nsrouter} link set veth1 up
ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
ip -net ${ns1} link set lo up
ip -net ${ns1} link set eth0 up
ip -net ${ns2} link set lo up
ip -net ${ns2} link set eth0 up
ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
ip -net ${ns1} addr add dead:1::99/64 dev eth0
ip -net ${ns1} route add default via 10.0.1.1
ip -net ${ns1} route add default via dead:1::1
ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
ip -net ${ns2} addr add dead:2::99/64 dev eth0
ip -net ${ns2} route add default via 10.0.2.1
ip -net ${ns2} route add default via dead:2::1
load_ruleset() {
local name=$1
local prio=$2
ip netns exec ${nsrouter} nft -f - <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
}
chain pre {
type filter hook prerouting priority $prio; policy accept;
jump nfq
}
chain input {
type filter hook input priority $prio; policy accept;
jump nfq
}
chain forward {
type filter hook forward priority $prio; policy accept;
tcp dport 12345 queue num 2
jump nfq
}
chain output {
type filter hook output priority $prio; policy accept;
tcp dport 12345 queue num 3
jump nfq
}
chain post {
type filter hook postrouting priority $prio; policy accept;
jump nfq
}
}
EOF
}
load_counter_ruleset() {
local prio=$1
ip netns exec ${nsrouter} nft -f - <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
counter
}
chain input {
type filter hook input priority $prio; policy accept;
counter
}
chain forward {
type filter hook forward priority $prio; policy accept;
counter
}
chain output {
type filter hook output priority $prio; policy accept;
counter
}
chain post {
type filter hook postrouting priority $prio; policy accept;
counter
}
}
EOF
}
test_ping() {
ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
if [ $? -ne 0 ];then
return 1
fi
ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
if [ $? -ne 0 ];then
return 1
fi
return 0
}
test_ping_router() {
ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null
if [ $? -ne 0 ];then
return 1
fi
ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null
if [ $? -ne 0 ];then
return 1
fi
return 0
}
test_queue_blackhole() {
local proto=$1
ip netns exec ${nsrouter} nft -f - <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
queue num 600
}
}
EOF
if [ $proto = "ip" ] ;then
ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
fi
# queue without bypass keyword should drop traffic if no listener exists.
if [ $lret -eq 0 ];then
echo "FAIL: $proto expected failure, got $lret" 1>&2
exit 1
fi
ip netns exec ${nsrouter} nft delete table $proto blackh
if [ $? -ne 0 ] ;then
echo "FAIL: $proto: Could not delete blackh table"
exit 1
fi
echo "PASS: $proto: statement with no listener results in packet drop"
}
test_queue()
{
local expected=$1
local last=""
# spawn nf-queue listeners
ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
if [ $ret -ne 0 ];then
echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2
exit $ret
fi
test_ping_router
ret=$?
if [ $ret -ne 0 ];then
echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2
exit $ret
fi
wait
ret=$?
for file in $TMPFILE0 $TMPFILE1; do
last=$(tail -n1 "$file")
if [ x"$last" != x"$expected packets total" ]; then
echo "FAIL: Expected $expected packets total, but got $last" 1>&2
cat "$file" 1>&2
ip netns exec ${nsrouter} nft list ruleset
exit 1
fi
done
echo "PASS: Expected and received $last"
}
test_tcp_forward()
{
ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
sleep 1
ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null &
rm -f "$tmpfile"
wait $rpid
wait $lpid
[ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain"
}
test_tcp_localhost()
{
tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
local nfqpid=$!
sleep 1
ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
rm -f "$tmpfile"
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
load_ruleset "filter" 0
sleep 3
test_ping
ret=$?
if [ $ret -eq 0 ];then
# queue bypass works (rules were skipped, no listener)
echo "PASS: ${ns1} can reach ${ns2}"
else
echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
exit $ret
fi
test_queue_blackhole ip
test_queue_blackhole ip6
# dummy ruleset to add base chains between the
# queueing rules. We don't want the second reinject
# to re-execute the old hooks.
load_counter_ruleset 10
# we are hooking all: prerouting/input/forward/output/postrouting.
# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
# so we expect that userspace program receives 10 packets.
test_queue 10
# same. We queue to a second program as well.
load_ruleset "filter2" 20
test_queue 20
test_tcp_forward
test_tcp_localhost
exit $ret