Merge tag 'net-5.16-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking fixes from Jakub Kicinski:
 "Networking fixes, including fixes from netfilter.

  Current release - regressions:

   - r8169: fix incorrect mac address assignment

   - vlan: fix underflow for the real_dev refcnt when vlan creation
     fails

   - smc: avoid warning of possible recursive locking

  Current release - new code bugs:

   - vsock/virtio: suppress used length validation

   - neigh: fix crash in v6 module initialization error path

  Previous releases - regressions:

   - af_unix: fix change in behavior in read after shutdown

   - igb: fix netpoll exit with traffic, avoid warning

   - tls: fix splice_read() when starting mid-record

   - lan743x: fix deadlock in lan743x_phy_link_status_change()

   - marvell: prestera: fix bridge port operation

  Previous releases - always broken:

   - tcp_cubic: fix spurious Hystart ACK train detections for
     not-cwnd-limited flows

   - nexthop: fix refcount issues when replacing IPv6 groups

   - nexthop: fix null pointer dereference when IPv6 is not enabled

   - phylink: force link down and retrigger resolve on interface change

   - mptcp: fix delack timer length calculation and incorrect early
     clearing

   - ieee802154: handle iftypes as u32, prevent shift-out-of-bounds

   - nfc: virtual_ncidev: change default device permissions

   - netfilter: ctnetlink: fix error codes and flags used for kernel
     side filtering of dumps

   - netfilter: flowtable: fix IPv6 tunnel addr match

   - ncsi: align payload to 32-bit to fix dropped packets

   - iavf: fix deadlock and loss of config during VF interface reset

   - ice: avoid bpf_prog refcount underflow

   - ocelot: fix broken PTP over IP and PTP API violations

  Misc:

   - marvell: mvpp2: increase MTU limit when XDP enabled"

* tag 'net-5.16-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (94 commits)
  net: dsa: microchip: implement multi-bridge support
  net: mscc: ocelot: correctly report the timestamping RX filters in ethtool
  net: mscc: ocelot: set up traps for PTP packets
  net: ptp: add a definition for the UDP port for IEEE 1588 general messages
  net: mscc: ocelot: create a function that replaces an existing VCAP filter
  net: mscc: ocelot: don't downgrade timestamping RX filters in SIOCSHWTSTAMP
  net: hns3: fix incorrect components info of ethtool --reset command
  net: hns3: fix one incorrect value of page pool info when queried by debugfs
  net: hns3: add check NULL address for page pool
  net: hns3: fix VF RSS failed problem after PF enable multi-TCs
  net: qed: fix the array may be out of bound
  net/smc: Don't call clcsock shutdown twice when smc shutdown
  net: vlan: fix underflow for the real_dev refcnt
  ptp: fix filter names in the documentation
  ethtool: ioctl: fix potential NULL deref in ethtool_set_coalesce()
  nfc: virtual_ncidev: change default device permissions
  net/sched: sch_ets: don't peek at classes beyond 'nbands'
  net: stmmac: Disable Tx queues when reconfiguring the interface
  selftests: tls: test for correct proto_ops
  tls: fix replacing proto_ops
  ...
This commit is contained in:
Linus Torvalds
2021-11-26 12:58:53 -08:00
89 changed files with 1965 additions and 668 deletions

View File

@@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest

View File

@@ -629,6 +629,66 @@ ipv6_fcnal()
log_test $? 0 "Nexthops removed on admin down"
}
ipv6_grp_refs()
{
if [ ! -x "$(command -v mausezahn)" ]; then
echo "SKIP: Could not run test; need mausezahn tool"
return
fi
run_cmd "$IP link set dev veth1 up"
run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
run_cmd "$IP nexthop add id 102 group 100"
run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
# create per-cpu dsts through nh 100
run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
# remove nh 100 from the group to delete the route potentially leaving
# a stale per-cpu dst which holds a reference to the nexthop's net
# device and to the IPv6 route
run_cmd "$IP nexthop replace id 102 group 101"
run_cmd "$IP route del 2001:db8:101::1/128"
# add both nexthops to the group so a reference is taken on them
run_cmd "$IP nexthop replace id 102 group 100/101"
# if the bug described in commit "net: nexthop: release IPv6 per-cpu
# dsts when replacing a nexthop group" exists at this point we have
# an unlinked IPv6 route (but not freed due to stale dst) with a
# reference over the group so we delete the group which will again
# only unlink it due to the route reference
run_cmd "$IP nexthop del id 102"
# delete the nexthop with stale dst, since we have an unlinked
# group with a ref to it and an unlinked IPv6 route with ref to the
# group, the nh will only be unlinked and not freed so the stale dst
# remains forever and we get a net device refcount imbalance
run_cmd "$IP nexthop del id 100"
# if a reference was lost this command will hang because the net device
# cannot be removed
timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
# we can't cleanup if the command is hung trying to delete the netdev
if [ $? -eq 137 ]; then
return 1
fi
# cleanup
run_cmd "$IP link del veth1.20"
run_cmd "$IP nexthop flush"
return 0
}
ipv6_grp_fcnal()
{
local rc
@@ -734,6 +794,9 @@ ipv6_grp_fcnal()
run_cmd "$IP nexthop add id 108 group 31/24"
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
ipv6_grp_refs
log_test $? 0 "Nexthop group replace refcounts"
}
ipv6_res_grp_fcnal()

View File

@@ -78,26 +78,21 @@ static void memrnd(void *s, size_t n)
*byte++ = rand();
}
FIXTURE(tls_basic)
{
int fd, cfd;
bool notls;
};
FIXTURE_SETUP(tls_basic)
static void ulp_sock_pair(struct __test_metadata *_metadata,
int *fd, int *cfd, bool *notls)
{
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
self->notls = false;
*notls = false;
len = sizeof(addr);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = 0;
self->fd = socket(AF_INET, SOCK_STREAM, 0);
*fd = socket(AF_INET, SOCK_STREAM, 0);
sfd = socket(AF_INET, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
@@ -108,26 +103,96 @@ FIXTURE_SETUP(tls_basic)
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
ret = connect(self->fd, &addr, sizeof(addr));
ret = connect(*fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
self->cfd = accept(sfd, &addr, &len);
ASSERT_GE(self->cfd, 0);
*cfd = accept(sfd, &addr, &len);
ASSERT_GE(*cfd, 0);
close(sfd);
ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
if (ret != 0) {
ASSERT_EQ(errno, ENOENT);
self->notls = true;
*notls = true;
printf("Failure setting TCP_ULP, testing without tls\n");
return;
}
ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
ASSERT_EQ(ret, 0);
}
/* Produce a basic cmsg */
static int tls_send_cmsg(int fd, unsigned char record_type,
void *data, size_t len, int flags)
{
char cbuf[CMSG_SPACE(sizeof(char))];
int cmsg_len = sizeof(char);
struct cmsghdr *cmsg;
struct msghdr msg;
struct iovec vec;
vec.iov_base = data;
vec.iov_len = len;
memset(&msg, 0, sizeof(struct msghdr));
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
msg.msg_control = cbuf;
msg.msg_controllen = sizeof(cbuf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_TLS;
/* test sending non-record types. */
cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
cmsg->cmsg_len = CMSG_LEN(cmsg_len);
*CMSG_DATA(cmsg) = record_type;
msg.msg_controllen = cmsg->cmsg_len;
return sendmsg(fd, &msg, flags);
}
static int tls_recv_cmsg(struct __test_metadata *_metadata,
int fd, unsigned char record_type,
void *data, size_t len, int flags)
{
char cbuf[CMSG_SPACE(sizeof(char))];
struct cmsghdr *cmsg;
unsigned char ctype;
struct msghdr msg;
struct iovec vec;
int n;
vec.iov_base = data;
vec.iov_len = len;
memset(&msg, 0, sizeof(struct msghdr));
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
msg.msg_control = cbuf;
msg.msg_controllen = sizeof(cbuf);
n = recvmsg(fd, &msg, flags);
cmsg = CMSG_FIRSTHDR(&msg);
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
ctype = *((unsigned char *)CMSG_DATA(cmsg));
EXPECT_EQ(ctype, record_type);
return n;
}
FIXTURE(tls_basic)
{
int fd, cfd;
bool notls;
};
FIXTURE_SETUP(tls_basic)
{
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
}
FIXTURE_TEARDOWN(tls_basic)
{
close(self->fd);
@@ -199,60 +264,21 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
FIXTURE_SETUP(tls)
{
struct tls_crypto_info_keys tls12;
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
self->notls = false;
len = sizeof(addr);
int ret;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
&tls12);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = 0;
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
self->fd = socket(AF_INET, SOCK_STREAM, 0);
sfd = socket(AF_INET, SOCK_STREAM, 0);
if (self->notls)
return;
ret = bind(sfd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = listen(sfd, 10);
ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
ret = getsockname(sfd, &addr, &len);
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
ret = connect(self->fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
if (ret != 0) {
self->notls = true;
printf("Failure setting TCP_ULP, testing without tls\n");
}
if (!self->notls) {
ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
tls12.len);
ASSERT_EQ(ret, 0);
}
self->cfd = accept(sfd, &addr, &len);
ASSERT_GE(self->cfd, 0);
if (!self->notls) {
ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
sizeof("tls"));
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
tls12.len);
ASSERT_EQ(ret, 0);
}
close(sfd);
}
FIXTURE_TEARDOWN(tls)
@@ -613,6 +639,95 @@ TEST_F(tls, splice_to_pipe)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
TEST_F(tls, splice_cmsg_to_pipe)
{
char *test_str = "test_read";
char record_type = 100;
int send_len = 10;
char buf[10];
int p[2];
ASSERT_GE(pipe(p), 0);
EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
EXPECT_EQ(errno, EINVAL);
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
EXPECT_EQ(errno, EIO);
EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
buf, sizeof(buf), MSG_WAITALL),
send_len);
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
}
TEST_F(tls, splice_dec_cmsg_to_pipe)
{
char *test_str = "test_read";
char record_type = 100;
int send_len = 10;
char buf[10];
int p[2];
ASSERT_GE(pipe(p), 0);
EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
EXPECT_EQ(errno, EIO);
EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
EXPECT_EQ(errno, EINVAL);
EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
buf, sizeof(buf), MSG_WAITALL),
send_len);
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
}
TEST_F(tls, recv_and_splice)
{
int send_len = TLS_PAYLOAD_MAX_LEN;
char mem_send[TLS_PAYLOAD_MAX_LEN];
char mem_recv[TLS_PAYLOAD_MAX_LEN];
int half = send_len / 2;
int p[2];
ASSERT_GE(pipe(p), 0);
EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
/* Recv hald of the record, splice the other half */
EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half);
EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK),
half);
EXPECT_EQ(read(p[0], &mem_recv[half], half), half);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
TEST_F(tls, peek_and_splice)
{
int send_len = TLS_PAYLOAD_MAX_LEN;
char mem_send[TLS_PAYLOAD_MAX_LEN];
char mem_recv[TLS_PAYLOAD_MAX_LEN];
int chunk = TLS_PAYLOAD_MAX_LEN / 4;
int n, i, p[2];
memrnd(mem_send, sizeof(mem_send));
ASSERT_GE(pipe(p), 0);
for (i = 0; i < 4; i++)
EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0),
chunk);
EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2,
MSG_WAITALL | MSG_PEEK),
chunk * 5 / 2);
EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0);
n = 0;
while (n < send_len) {
i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0);
EXPECT_GT(i, 0);
n += i;
}
EXPECT_EQ(n, send_len);
EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
TEST_F(tls, recvmsg_single)
{
char const *test_str = "test_recvmsg_single";
@@ -1193,60 +1308,30 @@ TEST_F(tls, mutliproc_sendpage_writers)
TEST_F(tls, control_msg)
{
if (self->notls)
return;
char cbuf[CMSG_SPACE(sizeof(char))];
char const *test_str = "test_read";
int cmsg_len = sizeof(char);
char *test_str = "test_read";
char record_type = 100;
struct cmsghdr *cmsg;
struct msghdr msg;
int send_len = 10;
struct iovec vec;
char buf[10];
vec.iov_base = (char *)test_str;
vec.iov_len = 10;
memset(&msg, 0, sizeof(struct msghdr));
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
msg.msg_control = cbuf;
msg.msg_controllen = sizeof(cbuf);
cmsg = CMSG_FIRSTHDR(&msg);
cmsg->cmsg_level = SOL_TLS;
/* test sending non-record types. */
cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
cmsg->cmsg_len = CMSG_LEN(cmsg_len);
*CMSG_DATA(cmsg) = record_type;
msg.msg_controllen = cmsg->cmsg_len;
if (self->notls)
SKIP(return, "no TLS support");
EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0),
send_len);
/* Should fail because we didn't provide a control message */
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
vec.iov_base = buf;
EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len);
cmsg = CMSG_FIRSTHDR(&msg);
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
record_type = *((unsigned char *)CMSG_DATA(cmsg));
EXPECT_EQ(record_type, 100);
EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
buf, sizeof(buf), MSG_WAITALL | MSG_PEEK),
send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
/* Recv the message again without MSG_PEEK */
record_type = 0;
memset(buf, 0, sizeof(buf));
EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
cmsg = CMSG_FIRSTHDR(&msg);
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
record_type = *((unsigned char *)CMSG_DATA(cmsg));
EXPECT_EQ(record_type, 100);
EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
buf, sizeof(buf), MSG_WAITALL),
send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
@@ -1301,6 +1386,160 @@ TEST_F(tls, shutdown_reuse)
EXPECT_EQ(errno, EISCONN);
}
FIXTURE(tls_err)
{
int fd, cfd;
int fd2, cfd2;
bool notls;
};
FIXTURE_VARIANT(tls_err)
{
uint16_t tls_version;
};
FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm)
{
.tls_version = TLS_1_2_VERSION,
};
FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm)
{
.tls_version = TLS_1_3_VERSION,
};
FIXTURE_SETUP(tls_err)
{
struct tls_crypto_info_keys tls12;
int ret;
tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
&tls12);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
if (self->notls)
return;
ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
}
FIXTURE_TEARDOWN(tls_err)
{
close(self->fd);
close(self->cfd);
close(self->fd2);
close(self->cfd2);
}
TEST_F(tls_err, bad_rec)
{
char buf[64];
if (self->notls)
SKIP(return, "no TLS support");
memset(buf, 0x55, sizeof(buf));
EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EMSGSIZE);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1);
EXPECT_EQ(errno, EAGAIN);
}
TEST_F(tls_err, bad_auth)
{
char buf[128];
int n;
if (self->notls)
SKIP(return, "no TLS support");
memrnd(buf, sizeof(buf) / 2);
EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2);
n = recv(self->cfd, buf, sizeof(buf), 0);
EXPECT_GT(n, sizeof(buf) / 2);
buf[n - 1]++;
EXPECT_EQ(send(self->fd2, buf, n, 0), n);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EBADMSG);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EBADMSG);
}
TEST_F(tls_err, bad_in_large_read)
{
char txt[3][64];
char cip[3][128];
char buf[3 * 128];
int i, n;
if (self->notls)
SKIP(return, "no TLS support");
/* Put 3 records in the sockets */
for (i = 0; i < 3; i++) {
memrnd(txt[i], sizeof(txt[i]));
EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0),
sizeof(txt[i]));
n = recv(self->cfd, cip[i], sizeof(cip[i]), 0);
EXPECT_GT(n, sizeof(txt[i]));
/* Break the third message */
if (i == 2)
cip[2][n - 1]++;
EXPECT_EQ(send(self->fd2, cip[i], n, 0), n);
}
/* We should be able to receive the first two messages */
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2);
EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0);
EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0);
/* Third mesasge is bad */
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EBADMSG);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EBADMSG);
}
TEST_F(tls_err, bad_cmsg)
{
char *test_str = "test_read";
int send_len = 10;
char cip[128];
char buf[128];
char txt[64];
int n;
if (self->notls)
SKIP(return, "no TLS support");
/* Queue up one data record */
memrnd(txt, sizeof(txt));
EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt));
n = recv(self->cfd, cip, sizeof(cip), 0);
EXPECT_GT(n, sizeof(txt));
EXPECT_EQ(send(self->fd2, cip, n, 0), n);
EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
n = recv(self->cfd, cip, sizeof(cip), 0);
cip[n - 1]++; /* Break it */
EXPECT_GT(n, send_len);
EXPECT_EQ(send(self->fd2, cip, n, 0), n);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt));
EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EBADMSG);
EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
EXPECT_EQ(errno, EBADMSG);
}
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -1355,64 +1594,82 @@ TEST(non_established) {
TEST(keysizes) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
int sfd, ret, fd, cfd;
socklen_t len;
int ret, fd, cfd;
bool notls;
notls = false;
len = sizeof(addr);
memset(&tls12, 0, sizeof(tls12));
tls12.info.version = TLS_1_2_VERSION;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = 0;
fd = socket(AF_INET, SOCK_STREAM, 0);
sfd = socket(AF_INET, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = listen(sfd, 10);
ASSERT_EQ(ret, 0);
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
ret = connect(fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
if (ret != 0) {
notls = true;
printf("Failure setting TCP_ULP, testing without tls\n");
}
ulp_sock_pair(_metadata, &fd, &cfd, &notls);
if (!notls) {
ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
sizeof(tls12));
EXPECT_EQ(ret, 0);
}
cfd = accept(sfd, &addr, &len);
ASSERT_GE(cfd, 0);
if (!notls) {
ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
sizeof("tls"));
EXPECT_EQ(ret, 0);
ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
sizeof(tls12));
EXPECT_EQ(ret, 0);
}
close(sfd);
close(fd);
close(cfd);
}
TEST(tls_v6ops) {
struct tls_crypto_info_keys tls12;
struct sockaddr_in6 addr, addr2;
int sfd, ret, fd;
socklen_t len, len2;
tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
addr.sin6_family = AF_INET6;
addr.sin6_addr = in6addr_any;
addr.sin6_port = 0;
fd = socket(AF_INET6, SOCK_STREAM, 0);
sfd = socket(AF_INET6, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = listen(sfd, 10);
ASSERT_EQ(ret, 0);
len = sizeof(addr);
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
ret = connect(fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
len = sizeof(addr);
ret = getsockname(fd, &addr, &len);
ASSERT_EQ(ret, 0);
ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
if (ret) {
ASSERT_EQ(errno, ENOENT);
SKIP(return, "no TLS support");
}
ASSERT_EQ(ret, 0);
ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len);
ASSERT_EQ(ret, 0);
len2 = sizeof(addr2);
ret = getsockname(fd, &addr2, &len2);
ASSERT_EQ(ret, 0);
EXPECT_EQ(len2, len);
EXPECT_EQ(memcmp(&addr, &addr2, len), 0);
close(fd);
close(sfd);
}
TEST_HARNESS_MAIN

View File

@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
conntrack_vrf.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue

View File

@@ -0,0 +1,219 @@
#!/bin/sh
# This script demonstrates interaction of conntrack and vrf.
# The vrf driver calls the netfilter hooks again, with oif/iif
# pointing at the VRF device.
#
# For ingress, this means first iteration has iifname of lower/real
# device. In this script, thats veth0.
# Second iteration is iifname set to vrf device, tvrf in this script.
#
# For egress, this is reversed: first iteration has the vrf device,
# second iteration is done with the lower/real/veth0 device.
#
# test_ct_zone_in demonstrates unexpected change of nftables
# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
# connection on VRF rcv"
#
# It was possible to assign conntrack zone to a packet (or mark it for
# `notracking`) in the prerouting chain before conntrack, based on real iif.
#
# After the change, the zone assignment is lost and the zone is assigned based
# on the VRF master interface (in case such a rule exists).
# assignment is lost. Instead, assignment based on the `iif` matching
# Thus it is impossible to distinguish packets based on the original
# interface.
#
# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
# that was supposed to be fixed by the commit mentioned above to make sure
# that any fix to test case 1 won't break masquerade again.
ksft_skip=4
IP0=172.30.30.1
IP1=172.30.30.2
PFXL=30
ret=0
sfx=$(mktemp -u "XXXXXXXX")
ns0="ns0-$sfx"
ns1="ns1-$sfx"
cleanup()
{
ip netns pids $ns0 | xargs kill 2>/dev/null
ip netns pids $ns1 | xargs kill 2>/dev/null
ip netns del $ns0 $ns1
}
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
ip netns add "$ns0"
if [ $? -ne 0 ];then
echo "SKIP: Could not create net namespace $ns0"
exit $ksft_skip
fi
ip netns add "$ns1"
trap cleanup EXIT
ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not add veth device"
exit $ksft_skip
fi
ip -net $ns0 li add tvrf type vrf table 9876
if [ $? -ne 0 ];then
echo "SKIP: Could not add vrf device"
exit $ksft_skip
fi
ip -net $ns0 li set lo up
ip -net $ns0 li set veth0 master tvrf
ip -net $ns0 li set tvrf up
ip -net $ns0 li set veth0 up
ip -net $ns1 li set veth0 up
ip -net $ns0 addr add $IP0/$PFXL dev veth0
ip -net $ns1 addr add $IP1/$PFXL dev veth0
ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
if [ $? -ne 0 ];then
echo "SKIP: Could not start iperf3"
exit $ksft_skip
fi
# test vrf ingress handling.
# The incoming connection should be placed in conntrack zone 1,
# as decided by the first iteration of the ruleset.
test_ct_zone_in()
{
ip netns exec $ns0 nft -f - <<EOF
table testct {
chain rawpre {
type filter hook prerouting priority raw;
iif { veth0, tvrf } counter meta nftrace set 1
iif veth0 counter ct zone set 1 counter return
iif tvrf counter ct zone set 2 counter return
ip protocol icmp counter
notrack counter
}
chain rawout {
type filter hook output priority raw;
oif veth0 counter ct zone set 1 counter return
oif tvrf counter ct zone set 2 counter return
notrack counter
}
}
EOF
ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
# should be in zone 1, not zone 2
count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
if [ $count -eq 1 ]; then
echo "PASS: entry found in conntrack zone 1"
else
echo "FAIL: entry not found in conntrack zone 1"
count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
if [ $count -eq 1 ]; then
echo "FAIL: entry found in zone 2 instead"
else
echo "FAIL: entry not in zone 1 or 2, dumping table"
ip netns exec $ns0 conntrack -L
ip netns exec $ns0 nft list ruleset
fi
fi
}
# add masq rule that gets evaluated w. outif set to vrf device.
# This tests the first iteration of the packet through conntrack,
# oifname is the vrf device.
test_masquerade_vrf()
{
ip netns exec $ns0 conntrack -F 2>/dev/null
ip netns exec $ns0 nft -f - <<EOF
flush ruleset
table ip nat {
chain postrouting {
type nat hook postrouting priority 0;
# NB: masquerade should always be combined with 'oif(name) bla',
# lack of this is intentional here, we want to exercise double-snat.
ip saddr 172.30.30.0/30 counter masquerade random
}
}
EOF
ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
if [ $? -ne 0 ]; then
echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
ret=1
return
fi
# must also check that nat table was evaluated on second (lower device) iteration.
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
if [ $? -eq 0 ]; then
echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
else
echo "FAIL: vrf masq rule has unexpected counter value"
ret=1
fi
}
# add masq rule that gets evaluated w. outif set to veth device.
# This tests the 2nd iteration of the packet through conntrack,
# oifname is the lower device (veth0 in this case).
test_masquerade_veth()
{
ip netns exec $ns0 conntrack -F 2>/dev/null
ip netns exec $ns0 nft -f - <<EOF
flush ruleset
table ip nat {
chain postrouting {
type nat hook postrouting priority 0;
meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
}
}
EOF
ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
if [ $? -ne 0 ]; then
echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
ret=1
return
fi
# must also check that nat table was evaluated on second (lower device) iteration.
ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
if [ $? -eq 0 ]; then
echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
else
echo "FAIL: vrf masq rule has unexpected counter value"
ret=1
fi
}
test_ct_zone_in
test_masquerade_vrf
test_masquerade_veth
exit $ret

View File

@@ -759,19 +759,21 @@ test_port_shadow()
local result=""
local logmsg=""
echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
nc_r=$!
echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
nc_c=$!
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
sc_r=$!
echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
sc_c=$!
sleep 0.3
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
if [ "$result" = "$expect" ] ;then
echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -780,7 +782,7 @@ test_port_shadow()
ret=1
fi
kill $nc_r $nc_c 2>/dev/null
kill $sc_r $sc_c 2>/dev/null
# flush udp entries for next test round, if any
ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -816,11 +818,10 @@ table $family raw {
chain prerouting {
type filter hook prerouting priority -300; policy accept;
meta iif veth0 udp dport 1405 notrack
udp dport 1405 notrack
}
chain output {
type filter hook output priority -300; policy accept;
udp sport 1405 notrack
meta oif veth0 udp sport 1405 notrack
}
}
EOF
@@ -851,6 +852,18 @@ test_port_shadowing()
{
local family="ip"
conntrack -h >/dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run nat port shadowing test without conntrack tool"
return
fi
socat -h > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run nat port shadowing test without socat tool"
return
fi
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null

View File

@@ -16,6 +16,10 @@ timeout=4
cleanup()
{
ip netns pids ${ns1} | xargs kill 2>/dev/null
ip netns pids ${ns2} | xargs kill 2>/dev/null
ip netns pids ${nsrouter} | xargs kill 2>/dev/null
ip netns del ${ns1}
ip netns del ${ns2}
ip netns del ${nsrouter}
@@ -332,6 +336,55 @@ EOF
echo "PASS: tcp via loopback and re-queueing"
}
test_icmp_vrf() {
ip -net $ns1 link add tvrf type vrf table 9876
if [ $? -ne 0 ];then
echo "SKIP: Could not add vrf device"
return
fi
ip -net $ns1 li set eth0 master tvrf
ip -net $ns1 li set tvrf up
ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
ip netns exec ${ns1} nft -f /dev/stdin <<EOF
flush ruleset
table inet filter {
chain output {
type filter hook output priority 0; policy accept;
meta oifname "tvrf" icmp type echo-request counter queue num 1
meta oifname "eth0" icmp type echo-request counter queue num 1
}
chain post {
type filter hook postrouting priority 0; policy accept;
meta oifname "tvrf" icmp type echo-request counter queue num 1
meta oifname "eth0" icmp type echo-request counter queue num 1
}
}
EOF
ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
local nfqpid=$!
sleep 1
ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
for n in output post; do
for d in tvrf eth0; do
ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
if [ $? -ne 0 ] ; then
echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
ip netns exec ${ns1} nft list ruleset
ret=1
return
fi
done
done
wait $nfqpid
[ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
wait 2>/dev/null
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -372,5 +425,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_requeue
test_icmp_vrf
exit $ret

View File

@@ -68,7 +68,7 @@
"cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
"matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
"$TC action flush action bpf"

View File

@@ -15,7 +15,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $ETH",
"matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
"matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "4",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -37,7 +37,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $ETH",
"matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
"matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}",
"matchCount": "256",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -60,7 +60,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
"matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
"matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "4",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -82,7 +82,7 @@
"cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
"matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
"matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "0",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -106,7 +106,7 @@
"cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
"matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
"matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "0",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -128,7 +128,7 @@
"cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
"expExitCode": "2",
"verifyCmd": "$TC qdisc show dev $ETH",
"matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
"matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
"matchCount": "0",
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"