From 4d25247d3ae486e6e4c59394487fd01523628234 Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:31 -0700 Subject: [PATCH 1/7] mptcp: bypass in-kernel PM restrictions for non-kernel PMs Current limits on the # of addresses/subflows must apply only to in-kernel PM managed sockets. Thus this change removes such restrictions on connections overseen by non-kernel (e.g. userspace) PMs. This change also ensures that the kernel does not record stats inside struct mptcp_pm_data updated along kernel code paths when exercised via non-kernel PMs. Additionally, address announcements are acknolwedged and subflow requests are honored only when it's deemed that a userspace path manager is active at the time. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm.c | 15 +++++++++++++-- net/mptcp/pm_netlink.c | 10 ++++++++++ net/mptcp/protocol.h | 6 ++++++ net/mptcp/subflow.c | 4 +++- 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 57f67578a47f..8df9cb28d970 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -87,6 +87,9 @@ bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) unsigned int subflows_max; int ret = 0; + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_active(msk); + subflows_max = mptcp_pm_get_subflows_max(msk); pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, @@ -179,7 +182,8 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, bool update_subflows; update_subflows = (ssk->sk_state == TCP_CLOSE) && - (subflow->request_join || subflow->mp_join); + (subflow->request_join || subflow->mp_join) && + mptcp_pm_is_kernel(msk); if (!READ_ONCE(pm->work_pending) && !update_subflows) return; @@ -208,7 +212,14 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, spin_lock_bh(&pm->lock); - if (!READ_ONCE(pm->accept_addr) || mptcp_pm_is_userspace(msk)) { + if (mptcp_pm_is_userspace(msk)) { + if (mptcp_userspace_pm_active(msk)) { + mptcp_pm_announce_addr(msk, addr, true); + mptcp_pm_add_addr_send_ack(msk); + } else { + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); + } + } else if (!READ_ONCE(pm->accept_addr)) { mptcp_pm_announce_addr(msk, addr, true); mptcp_pm_add_addr_send_ack(msk); } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 473e5aa7baf4..d2b63529bfee 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -805,6 +805,9 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, if (!removed) continue; + if (!mptcp_pm_is_kernel(msk)) + continue; + if (rm_type == MPTCP_MIB_RMADDR) { msk->pm.add_addr_accepted--; WRITE_ONCE(msk->pm.accept_addr, true); @@ -1855,6 +1858,13 @@ static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gf nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); } +bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) +{ + return genl_has_listeners(&mptcp_genl_family, + sock_net((const struct sock *)msk), + MPTCP_PM_EV_GRP_OFFSET); +} + static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) { const struct inet_sock *issk = inet_sk(ssk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 54d2b3b2d100..85390146944d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -784,6 +784,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); +bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { @@ -811,6 +812,11 @@ static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; } +static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) +{ + return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; +} + static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) { u8 len = TCPOLEN_MPTCP_ADD_ADDR_BASE; diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 75c824b67ca9..9567231a4bfa 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) { return mptcp_is_fully_established((void *)msk) && - READ_ONCE(msk->pm.accept_subflow); + ((mptcp_pm_is_userspace(msk) && + mptcp_userspace_pm_active(msk)) || + READ_ONCE(msk->pm.accept_subflow)); } /* validate received token and create truncated hmac and nonce for SYN-ACK */ From b3b71bf915210969ef4807e39c8f037c40a05e76 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Mon, 2 May 2022 13:52:32 -0700 Subject: [PATCH 2/7] selftests: mptcp: ADD_ADDR echo test with missing userspace daemon Check userspace PM behavior to ensure ADD_ADDR echoes are only sent when there is an active userspace daemon. If the daemon is restarting or hasn't loaded yet, the missing echo will cause the peer to retransmit the ADD_ADDR - and hopefully the daemon will be ready to receive it at that later time. Reviewed-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b27854f976f7..d1de1e7702fb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2719,6 +2719,17 @@ userspace_tests() chk_add_nr 0 0 fi + # userspace pm type does not echo add_addr without daemon + if reset "userspace pm no echo w/o daemon"; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + chk_add_nr 1 0 + fi + # userspace pm type rejects join if reset "userspace pm type rejects join"; then set_userspace_pm $ns1 From 8a348392209ffdd70926253f014eec0c04dbf3e7 Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:33 -0700 Subject: [PATCH 3/7] mptcp: store remote id from MP_JOIN SYN/ACK in local ctx This change reads the addr id assigned to the remote endpoint of a subflow from the MP_JOIN SYN/ACK message and stores it in the related subflow context. The remote id was not being captured prior to this change, and will now provide a consistent view of remote endpoints and their ids as seen through netlink events. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9567231a4bfa..a0e7af33fb26 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -443,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->backup = mp_opt.backup; subflow->thmac = mp_opt.thmac; subflow->remote_nonce = mp_opt.nonce; + subflow->remote_id = mp_opt.join_id; pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", subflow, subflow->thmac, subflow->remote_nonce, subflow->backup); From d1ace2d9abf3eb5aaa91621050bfd02695721d18 Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:34 -0700 Subject: [PATCH 4/7] mptcp: reflect remote port (not 0) in ANNOUNCED events Per RFC 8684, if no port is specified in an ADD_ADDR message, MPTCP SHOULD attempt to connect to the specified address on the same port as the port that is already in use by the subflow on which the ADD_ADDR signal was sent. To facilitate that, this change reflects the specific remote port in use by that subflow in MPTCP_EVENT_ANNOUNCED events. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 2 +- net/mptcp/pm.c | 6 ++++-- net/mptcp/pm_netlink.c | 11 ++++++++--- net/mptcp/protocol.h | 4 ++-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 88f4ebbd6515..c9625fea3ef9 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1133,7 +1133,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && add_addr_hmac_valid(msk, &mp_opt)) { if (!mp_opt.echo) { - mptcp_pm_add_addr_received(msk, &mp_opt.addr); + mptcp_pm_add_addr_received(sk, &mp_opt.addr); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); } else { mptcp_pm_add_addr_echoed(msk, &mp_opt.addr); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 8df9cb28d970..5d6832c4d9f2 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -200,15 +200,17 @@ void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, spin_unlock_bh(&pm->lock); } -void mptcp_pm_add_addr_received(struct mptcp_sock *msk, +void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_pm_data *pm = &msk->pm; pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, READ_ONCE(pm->accept_addr)); - mptcp_event_addr_announced(msk, addr); + mptcp_event_addr_announced(ssk, addr); spin_lock_bh(&pm->lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d2b63529bfee..eeaa96bcae6c 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -2019,10 +2019,12 @@ nla_put_failure: kfree_skb(skb); } -void mptcp_event_addr_announced(const struct mptcp_sock *msk, +void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info) { - struct net *net = sock_net((const struct sock *)msk); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct net *net = sock_net(ssk); struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2044,7 +2046,10 @@ void mptcp_event_addr_announced(const struct mptcp_sock *msk, if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) goto nla_put_failure; - if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, + info->port == 0 ? + inet_sk(ssk)->inet_dport : + info->port)) goto nla_put_failure; switch (info->family) { diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 85390146944d..a762b789f5ab 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -753,7 +753,7 @@ void mptcp_pm_subflow_established(struct mptcp_sock *msk); bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, const struct mptcp_subflow_context *subflow); -void mptcp_pm_add_addr_received(struct mptcp_sock *msk, +void mptcp_pm_add_addr_received(const struct sock *ssk, const struct mptcp_addr_info *addr); void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, const struct mptcp_addr_info *addr); @@ -782,7 +782,7 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list * void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); -void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); +void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); From 70c708e82606f842dc1bcf0943b8acae30c4088f Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:35 -0700 Subject: [PATCH 5/7] mptcp: establish subflows from either end of connection This change updates internal logic to permit subflows to be established from either the client or server ends of MPTCP connections. This symmetry and added flexibility may be harnessed by PM implementations running on either end in creating new subflows. The essence of this change lies in not relying on the "server_side" flag (which continues to be available if needed). Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 2 +- net/mptcp/protocol.c | 5 +---- net/mptcp/protocol.h | 8 ++++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c9625fea3ef9..e05d9458a025 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -931,7 +931,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && - READ_ONCE(msk->pm.server_side)) + !subflow->request_join) tcp_send_ack(ssk); goto fully_established; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a5d466e6b538..5d529143ad77 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3321,15 +3321,12 @@ bool mptcp_finish_join(struct sock *ssk) return false; } - if (!msk->pm.server_side) + if (!list_empty(&subflow->node)) goto out; if (!mptcp_pm_allow_new_subflow(msk)) goto err_prohibited; - if (WARN_ON_ONCE(!list_empty(&subflow->node))) - goto err_prohibited; - /* active connections are already on conn_list. * If we can't acquire msk socket lock here, let the release callback * handle it diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a762b789f5ab..187c932deef0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -911,13 +911,17 @@ static inline bool mptcp_check_infinite_map(struct sk_buff *skb) return false; } +static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) +{ + return (subflow->request_mptcp || subflow->request_join); +} + static inline bool subflow_simultaneous_connect(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct sock *parent = subflow->conn; return sk->sk_state == TCP_ESTABLISHED && - !mptcp_sk(parent)->pm.server_side && + is_active_ssk(subflow) && !subflow->conn_finished; } From 41b3c69bf9414375319290c59f198ff5c71d273f Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:36 -0700 Subject: [PATCH 6/7] mptcp: expose server_side attribute in MPTCP netlink events This change records the 'server_side' attribute of MPTCP_EVENT_CREATED and MPTCP_EVENT_ESTABLISHED events to inform their recipient about the Client/Server role of the running MPTCP application. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/246 Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 1 + net/mptcp/pm_netlink.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 9690efedb5fa..e41ea01a94bb 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -188,6 +188,7 @@ enum mptcp_event_attr { MPTCP_ATTR_IF_IDX, /* s32 */ MPTCP_ATTR_RESET_REASON,/* u32 */ MPTCP_ATTR_RESET_FLAGS, /* u32 */ + MPTCP_ATTR_SERVER_SIDE, /* u8 */ __MPTCP_ATTR_AFTER_LAST }; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index eeaa96bcae6c..a4430c576ce9 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1985,6 +1985,9 @@ static int mptcp_event_created(struct sk_buff *skb, if (err) return err; + if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) + return -EMSGSIZE; + return mptcp_event_add_subflow(skb, ssk); } From 304ab97f4c7c49dc6a1d1dac04cabba602a25c43 Mon Sep 17 00:00:00 2001 From: Kishen Maloor Date: Mon, 2 May 2022 13:52:37 -0700 Subject: [PATCH 7/7] mptcp: allow ADD_ADDR reissuance by userspace PMs This change allows userspace PM implementations to reissue ADD_ADDR announcements (if necessary) based on their chosen policy. Acked-by: Paolo Abeni Signed-off-by: Kishen Maloor Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a4430c576ce9..98b205c2c101 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -369,8 +369,16 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, lockdep_assert_held(&msk->pm.lock); - if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) - return false; + add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); + + if (add_entry) { + if (mptcp_pm_is_kernel(msk)) + return false; + + sk_reset_timer(sk, &add_entry->add_timer, + jiffies + mptcp_get_add_addr_timeout(net)); + return true; + } add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); if (!add_entry)