Merge branch 'mptcp-mesh-path-manager'

Mat Martineau says:

====================
mptcp: Add full mesh path manager option

The path manager in MPTCP controls the creation of additional subflows
after the initial connection is created. As each peer advertises
available endpoints with the ADD_ADDR MPTCP option, the recipient of
those advertisements must decide which subflows to create from the known
local and remote interfaces that are available for use by MPTCP.

The existing in-kernel path manager will create one additional subflow
when an ADD_ADDR is received, or a local address is newly configured for
MPTCP use. The maximum number of subflows has a configurable limit.

This patch set adds a MPTCP_PM_ADDR_FLAG_FULLMESH flag to the MPTCP
netlink API that enables subflows to be created more aggressively. When
an ADD_ADDR is received from a peer, new subflows are created between
that address/port and all local addresses configured for MPTCP.
Similarly, when a new local address is newly configured for use by
MPTCP, new subflows are created between that local address and all known
remote addresses for that MPTCP connection. The configurable limit on
the number of subflows still applies. If the new flag is not used the
path manager behavior is unchanged.

Patch 1 adds a helper function and refactors another function to prepare
for the rest of the patch series.

Patches 2 and 3 add two mesh connection capabilities: initiating
subflows based on added local addresses, or reacting to incoming
advertisements.

Patches 4-6 add full mesh cases to the self tests.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-08-18 10:10:01 +01:00
commit 606befcd5d
6 changed files with 231 additions and 26 deletions

View File

@ -73,6 +73,7 @@ enum {
#define MPTCP_PM_ADDR_FLAG_SIGNAL (1 << 0)
#define MPTCP_PM_ADDR_FLAG_SUBFLOW (1 << 1)
#define MPTCP_PM_ADDR_FLAG_BACKUP (1 << 2)
#define MPTCP_PM_ADDR_FLAG_FULLMESH (1 << 3)
enum {
MPTCP_PM_CMD_UNSPEC,

View File

@ -410,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
struct mptcp_addr_info *addr)
{
int i;
for (i = 0; i < nr; i++) {
if (addresses_equal(&addrs[i], addr, addr->port))
return true;
}
return false;
}
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
struct mptcp_addr_info *addrs)
{
struct sock *sk = (struct sock *)msk, *ssk;
struct mptcp_subflow_context *subflow;
struct mptcp_addr_info remote = { 0 };
unsigned int subflows_max;
int i = 0;
subflows_max = mptcp_pm_get_subflows_max(msk);
/* Non-fullmesh endpoint, fill in the single entry
* corresponding to the primary MPC subflow remote address
*/
if (!fullmesh) {
remote_address((struct sock_common *)sk, &remote);
msk->pm.subflows++;
addrs[i++] = remote;
} else {
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &remote);
if (!lookup_address_in_vec(addrs, i, &remote) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
addrs[i++] = remote;
}
}
}
return i;
}
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@ -455,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
!READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
struct mptcp_addr_info remote = { 0 };
bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
int i, nr;
msk->pm.local_addr_used++;
msk->pm.subflows++;
check_work_pending(msk);
remote_address((struct sock_common *)sk, &remote);
nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
spin_unlock_bh(&msk->pm.lock);
__mptcp_subflow_connect(sk, &local->addr, &remote,
local->flags, local->ifindex);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
return;
}
@ -484,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
mptcp_pm_create_subflow_or_signal_addr(msk);
}
/* Fill all the local addresses into the array addrs[],
* and return the array size.
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
struct mptcp_addr_info *addrs)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
struct mptcp_addr_info local;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
subflows_max = mptcp_pm_get_subflows_max(msk);
rcu_read_lock();
__mptcp_flush_join_list(msk);
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
if ((entry->addr.family == AF_INET &&
!ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
(sk->sk_family == AF_INET &&
!ipv6_addr_v4mapped(&entry->addr.addr6)))
#endif
continue;
}
if (msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
addrs[i++] = entry->addr;
}
}
rcu_read_unlock();
/* If the array is empty, fill in the single
* 'IPADDRANY' local address
*/
if (!i) {
memset(&local, 0, sizeof(local));
local.family = msk->pm.remote.family;
msk->pm.subflows++;
addrs[i++] = local;
}
return i;
}
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
struct mptcp_addr_info local;
unsigned int subflows_max;
int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
@ -502,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
goto add_addr_echo;
msk->pm.add_addr_accepted++;
msk->pm.subflows++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
msk->pm.subflows >= subflows_max)
WRITE_ONCE(msk->pm.accept_addr, false);
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
remote = msk->pm.remote;
if (!remote.port)
remote.port = sk->sk_dport;
memset(&local, 0, sizeof(local));
local.family = remote.family;
nr = fill_local_addresses_vec(msk, addrs);
msk->pm.add_addr_accepted++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
msk->pm.subflows >= subflows_max)
WRITE_ONCE(msk->pm.accept_addr, false);
spin_unlock_bh(&msk->pm.lock);
__mptcp_subflow_connect(sk, &local, &remote, 0, 0);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
add_addr_echo:
@ -1105,6 +1208,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
u8 *flags, int *ifindex)
{
struct mptcp_pm_addr_entry *entry;
*flags = 0;
*ifindex = 0;
if (id) {
rcu_read_lock();
entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
if (entry) {
*flags = entry->flags;
*ifindex = entry->ifindex;
}
rcu_read_unlock();
}
return 0;
}
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr)
{

View File

@ -577,8 +577,7 @@ struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote,
u8 flags, int ifindex);
const struct mptcp_addr_info *remote);
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
@ -733,6 +732,8 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
struct mptcp_pm_add_entry *
mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr);
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
u8 *flags, int *ifindex);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,

View File

@ -1355,8 +1355,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
}
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote,
u8 flags, int ifindex)
const struct mptcp_addr_info *remote)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_subflow_context *subflow;
@ -1367,6 +1366,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sock *ssk;
u32 remote_token;
int addrlen;
int ifindex;
u8 flags;
int err;
if (!mptcp_is_fully_established(sk))
@ -1390,6 +1391,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
local_id = err;
}
mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
&flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;

View File

@ -344,17 +344,18 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns1 ]
do
id=${dump[$pos]}
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
let id+=1
let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
@ -366,6 +367,12 @@ do_transfer()
fi
fi
flags="subflow"
if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
flags="${flags},fullmesh"
addr_nr_ns2=${addr_nr_ns2:9}
fi
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
@ -377,7 +384,7 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
let counter+=1
let add_nr_ns2-=1
done
@ -386,17 +393,18 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns2 ]
do
id=${dump[$pos]}
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
let id+=1
let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
@ -1686,6 +1694,55 @@ deny_join_id0_tests()
chk_join_nr "subflow and address allow join id0 2" 1 1 1
}
fullmesh_tests()
{
# fullmesh 1
# 2 fullmesh addrs in ns2, added before the connection,
# 1 non-fullmesh addr in ns1, added during the connection.
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 4
ip netns exec $ns2 ./pm_nl_ctl limits 1 4
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
chk_join_nr "fullmesh test 2x1" 4 4 4
chk_add_nr 1 1
# fullmesh 2
# 1 non-fullmesh addr in ns1, added before the connection,
# 1 fullmesh addr in ns2, added during the connection.
reset
ip netns exec $ns1 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
chk_join_nr "fullmesh test 1x1" 3 3 3
chk_add_nr 1 1
# fullmesh 3
# 1 non-fullmesh addr in ns1, added before the connection,
# 2 fullmesh addrs in ns2, added during the connection.
reset
ip netns exec $ns1 ./pm_nl_ctl limits 2 5
ip netns exec $ns2 ./pm_nl_ctl limits 1 5
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
chk_join_nr "fullmesh test 1x2" 5 5 5
chk_add_nr 1 1
# fullmesh 4
# 1 non-fullmesh addr in ns1, added before the connection,
# 2 fullmesh addrs in ns2, added during the connection,
# limit max_subflows to 4.
reset
ip netns exec $ns1 ./pm_nl_ctl limits 2 4
ip netns exec $ns2 ./pm_nl_ctl limits 1 4
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
chk_join_nr "fullmesh test 1x2, limited" 4 4 4
chk_add_nr 1 1
}
all_tests()
{
subflows_tests
@ -1701,6 +1758,7 @@ all_tests()
syncookies_tests
checksum_tests
deny_join_id0_tests
fullmesh_tests
}
usage()
@ -1719,6 +1777,7 @@ usage()
echo " -k syncookies_tests"
echo " -S checksum_tests"
echo " -d deny_join_id0_tests"
echo " -m fullmesh_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help"
@ -1754,7 +1813,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
while getopts 'fsltra64bpkdchCS' opt; do
while getopts 'fsltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
@ -1795,6 +1854,9 @@ while getopts 'fsltra64bpkdchCS' opt; do
d)
deny_join_id0_tests
;;
m)
fullmesh_tests
;;
c)
;;
C)

View File

@ -25,7 +25,7 @@
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
else if (!strcmp(tok, "fullmesh"))
flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
else
error(1, errno,
"unknown flag %s", argv[arg]);
}
if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
error(1, errno, "error flag fullmesh");
}
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
rta->rta_len = RTA_LENGTH(4);
@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
printf("fullmesh");
flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
if (flags)
printf(",");
}
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);