From e98014306840f58072f50a55ad49400f227a5b65 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 15:59:54 -0800 Subject: [PATCH 1/7] mptcp: move pm netlink work into pm_netlink Allows to make some functions static and avoids acquire of the pm spinlock in protocol.c. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 45 +++++++++++++++++++++++++++++++++++++----- net/mptcp/protocol.c | 33 +------------------------------ net/mptcp/protocol.h | 6 +----- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 23780a13b934..8f2fd6874d85 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -56,6 +56,8 @@ struct pm_nl_pernet { #define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3 +static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); + static bool addresses_equal(const struct mptcp_addr_info *a, struct mptcp_addr_info *b, bool use_port) { @@ -448,17 +450,17 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } } -void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) +static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) { mptcp_pm_create_subflow_or_signal_addr(msk); } -void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) +static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) { mptcp_pm_create_subflow_or_signal_addr(msk); } -void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; unsigned int add_addr_accept_max; @@ -498,7 +500,7 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) mptcp_pm_nl_add_addr_send_ack(msk); } -void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) +static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -568,7 +570,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, return -EINVAL; } -void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) +static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; @@ -605,6 +607,39 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) } } +void mptcp_pm_nl_work(struct mptcp_sock *msk) +{ + struct mptcp_pm_data *pm = &msk->pm; + + msk_owned_by_me(msk); + + spin_lock_bh(&msk->pm.lock); + + pr_debug("msk=%p status=%x", msk, pm->status); + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); + mptcp_pm_nl_add_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); + mptcp_pm_nl_add_addr_send_ack(msk); + } + if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { + pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); + mptcp_pm_nl_rm_addr_received(msk); + } + if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); + mptcp_pm_nl_fully_established(msk); + } + if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { + pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); + mptcp_pm_nl_subflow_established(msk); + } + + spin_unlock_bh(&msk->pm.lock); +} + void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) { struct mptcp_subflow_context *subflow, *tmp; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b9f16a1535d2..93134b72490a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2152,37 +2152,6 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) return 0; } -static void pm_work(struct mptcp_sock *msk) -{ - struct mptcp_pm_data *pm = &msk->pm; - - spin_lock_bh(&msk->pm.lock); - - pr_debug("msk=%p status=%x", msk, pm->status); - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); - mptcp_pm_nl_add_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); - mptcp_pm_nl_add_addr_send_ack(msk); - } - if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { - pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); - mptcp_pm_nl_rm_addr_received(msk); - } - if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); - mptcp_pm_nl_fully_established(msk); - } - if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { - pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); - mptcp_pm_nl_subflow_established(msk); - } - - spin_unlock_bh(&msk->pm.lock); -} - static void __mptcp_close_subflow(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; @@ -2271,7 +2240,7 @@ static void mptcp_worker(struct work_struct *work) __mptcp_close_subflow(msk); if (msk->pm.status) - pm_work(msk); + mptcp_pm_nl_work(msk); if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 73a923d02aad..702dbfefa093 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -713,11 +713,7 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_data_init(struct mptcp_sock *msk); -void mptcp_pm_nl_fully_established(struct mptcp_sock *msk); -void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk); -void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk); -void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk); +void mptcp_pm_nl_work(struct mptcp_sock *msk); void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk); From a141e02e393370e082b25636401c49978b61bfcf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 15:59:55 -0800 Subject: [PATCH 2/7] mptcp: split __mptcp_close_ssk helper Prepare for subflow close events: When mptcp connection is torn down its enough to send the mptcp socket close notification rather than a subflow close event for all of the subflows followed by the mptcp close event. This splits the helper: mptcp_close_ssk() will emit the close notification, __mptcp_close_ssk will not. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 4 ++-- net/mptcp/protocol.c | 12 +++++++++--- net/mptcp/protocol.h | 4 ++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 8f2fd6874d85..c3abff40fa4e 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -594,7 +594,7 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - __mptcp_close_ssk(sk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); msk->pm.add_addr_accepted--; @@ -664,7 +664,7 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, how); - __mptcp_close_ssk(sk, ssk, subflow); + mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); msk->pm.local_addr_used--; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 93134b72490a..3fd8aef979a3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2114,8 +2114,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) * so we need to use tcp_close() after detaching them from the mptcp * parent socket. */ -void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow) +static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow) { list_del(&subflow->node); @@ -2147,6 +2147,12 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, sock_put(ssk); } +void mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow) +{ + __mptcp_close_ssk(sk, ssk, subflow); +} + static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) { return 0; @@ -2164,7 +2170,7 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (inet_sk_state_load(ssk) != TCP_CLOSE) continue; - __mptcp_close_ssk((struct sock *)msk, ssk, subflow); + mptcp_close_ssk((struct sock *)msk, ssk, subflow); } } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 702dbfefa093..3081294dca6c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -539,8 +539,8 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, bool mptcp_subflow_data_available(struct sock *sk); void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); -void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, - struct mptcp_subflow_context *subflow); +void mptcp_close_ssk(struct sock *sk, struct sock *ssk, + struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); From 40947e13997a1cba4e875893ca6e5d5e61a0689d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 15:59:56 -0800 Subject: [PATCH 3/7] mptcp: schedule worker when subflow is closed When remote side closes a subflow we should schedule the worker to dispose of the subflow in a timely manner. Otherwise, SF_CLOSED event won't be generated until the mptcp socket itself is closing or local side is closing another subflow. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 4 ++++ net/mptcp/subflow.c | 25 +++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3fd8aef979a3..267c5521692d 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2170,6 +2170,10 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) if (inet_sk_state_load(ssk) != TCP_CLOSE) continue; + /* 'subflow_data_ready' will re-sched once rx queue is empty */ + if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) + continue; + mptcp_close_ssk((struct sock *)msk, ssk, subflow); } } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 280da418d60b..36b15726f851 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -953,6 +953,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, subflow->map_valid = 0; } +/* sched mptcp worker to remove the subflow if no more data is pending */ +static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk) +{ + struct sock *sk = (struct sock *)msk; + + if (likely(ssk->sk_state != TCP_CLOSE)) + return; + + if (skb_queue_empty(&ssk->sk_receive_queue) && + !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) { + sock_hold(sk); + if (!schedule_work(&msk->work)) + sock_put(sk); + } +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -991,11 +1007,11 @@ static bool subflow_check_data_avail(struct sock *ssk) } if (status != MAPPING_OK) - return false; + goto no_data; skb = skb_peek(&ssk->sk_receive_queue); if (WARN_ON_ONCE(!skb)) - return false; + goto no_data; /* if msk lacks the remote key, this subflow must provide an * MP_CAPABLE-based mapping @@ -1029,6 +1045,9 @@ static bool subflow_check_data_avail(struct sock *ssk) } return true; +no_data: + subflow_sched_work_if_closed(msk, ssk); + return false; fatal: /* fatal protocol error, close the socket */ /* This barrier is coupled with smp_rmb() in tcp_poll() */ @@ -1413,6 +1432,8 @@ static void subflow_state_change(struct sock *sk) if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); + subflow_sched_work_if_closed(mptcp_sk(parent), sk); + if (__mptcp_check_fallback(mptcp_sk(parent)) && !subflow->rx_eof && subflow_is_done(sk)) { subflow->rx_eof = 1; From b263b0d7d60baecda3c840a0703bb6d511f7ae2d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 15:59:57 -0800 Subject: [PATCH 4/7] mptcp: move subflow close loop after sk close check In case mptcp socket is already dead the entire mptcp socket will be freed. We can avoid the close check in this case. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 267c5521692d..1b8be2bf6b43 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2246,9 +2246,6 @@ static void mptcp_worker(struct work_struct *work) mptcp_check_fastclose(msk); - if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) - __mptcp_close_subflow(msk); - if (msk->pm.status) mptcp_pm_nl_work(msk); @@ -2270,6 +2267,9 @@ static void mptcp_worker(struct work_struct *work) goto unlock; } + if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) + __mptcp_close_subflow(msk); + if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; From 6c714f1b547feb0402520357c91024375a4236f7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 15:59:58 -0800 Subject: [PATCH 5/7] mptcp: pass subflow socket to a few helpers Pass the first/initial subflow to the existing functions so they can pass this on to the notification handler that is added later in the series. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 2 +- net/mptcp/pm.c | 4 ++-- net/mptcp/protocol.c | 4 ++-- net/mptcp/protocol.h | 4 ++-- net/mptcp/subflow.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 3b71d68b3863..bb874c5d663a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -867,7 +867,7 @@ fully_established: clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk, subflow); } else { - mptcp_pm_fully_established(msk); + mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); } return true; diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 1a25003fd8e3..1dd0e9d7ed06 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -68,7 +68,7 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id) /* path manager event handlers */ -void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side) +void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) { struct mptcp_pm_data *pm = &msk->pm; @@ -119,7 +119,7 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, return true; } -void mptcp_pm_fully_established(struct mptcp_sock *msk) +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) { struct mptcp_pm_data *pm = &msk->pm; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1b8be2bf6b43..56240b87d464 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3028,7 +3028,7 @@ void mptcp_finish_connect(struct sock *ssk) WRITE_ONCE(msk->can_ack, 1); WRITE_ONCE(msk->snd_una, msk->write_seq); - mptcp_pm_new_connection(msk, 0); + mptcp_pm_new_connection(msk, ssk, 0); mptcp_rcv_space_init(msk, ssk); } @@ -3272,7 +3272,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, list_add(&subflow->node, &msk->conn_list); sock_hold(msk->first); if (mptcp_is_fully_established(newsk)) - mptcp_pm_fully_established(msk); + mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL); mptcp_copy_inaddrs(newsk, msk->first); mptcp_rcv_space_init(msk, msk->first); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 3081294dca6c..f620e2f98d19 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -639,8 +639,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac); void __init mptcp_pm_init(void); void mptcp_pm_data_init(struct mptcp_sock *msk); -void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side); -void mptcp_pm_fully_established(struct mptcp_sock *msk); +void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); +void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); void mptcp_pm_connection_closed(struct mptcp_sock *msk); void mptcp_pm_subflow_established(struct mptcp_sock *msk, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 36b15726f851..ce2dea2a6e0a 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -675,7 +675,7 @@ create_child: * created mptcp socket */ new_msk->sk_destruct = mptcp_sock_destruct; - mptcp_pm_new_connection(mptcp_sk(new_msk), 1); + mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); ctx->conn = new_msk; new_msk = NULL; From 4d54cc32112d8d8b0667559c9309f1a6f764f70b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 15:59:59 -0800 Subject: [PATCH 6/7] mptcp: avoid lock_fast usage in accept path Once event support is added this may need to allocate memory while msk lock is held with softirqs disabled. Not using lock_fast also allows to do the allocation with GFP_KERNEL. Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/net/genetlink.h | 1 + net/mptcp/protocol.c | 5 ++--- net/netlink/genetlink.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index e55ec1597ce7..7cb3fa8310ed 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -14,6 +14,7 @@ */ struct genl_multicast_group { char name[GENL_NAMSIZ]; + u8 flags; }; struct genl_ops; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 56240b87d464..fe6da1b77723 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3260,9 +3260,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, struct mptcp_sock *msk = mptcp_sk(newsock->sk); struct mptcp_subflow_context *subflow; struct sock *newsk = newsock->sk; - bool slowpath; - slowpath = lock_sock_fast(newsk); + lock_sock(newsk); /* PM/worker can now acquire the first subflow socket * lock without racing with listener queue cleanup, @@ -3288,7 +3287,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssk->sk_socket) mptcp_sock_graft(ssk, newsock); } - unlock_sock_fast(newsk, slowpath); + release_sock(newsk); } if (inet_csk_listen_poll(ssock->sk)) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index c992424e4d63..2d6fdf40df66 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1360,11 +1360,43 @@ static struct genl_family genl_ctrl __ro_after_init = { .netnsok = true, }; +static int genl_bind(struct net *net, int group) +{ + const struct genl_family *family; + unsigned int id; + int ret = 0; + + genl_lock_all(); + + idr_for_each_entry(&genl_fam_idr, family, id) { + const struct genl_multicast_group *grp; + int i; + + if (family->n_mcgrps == 0) + continue; + + i = group - family->mcgrp_offset; + if (i < 0 || i >= family->n_mcgrps) + continue; + + grp = &family->mcgrps[i]; + if ((grp->flags & GENL_UNS_ADMIN_PERM) && + !ns_capable(net->user_ns, CAP_NET_ADMIN)) + ret = -EPERM; + + break; + } + + genl_unlock_all(); + return ret; +} + static int __net_init genl_pernet_init(struct net *net) { struct netlink_kernel_cfg cfg = { .input = genl_rcv, .flags = NL_CFG_F_NONROOT_RECV, + .bind = genl_bind, }; /* we'll bump the group number right afterwards */ From b911c97c7dc771633c68ea9b8f15070f8af3d323 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 12 Feb 2021 16:00:01 -0800 Subject: [PATCH 7/7] mptcp: add netlink event support Allow userspace (mptcpd) to subscribe to mptcp genl multicast events. This implementation reuses the same event API as the mptcp kernel fork to ease integration of existing tools, e.g. mptcpd. Supported events include: 1. start and close of an mptcp connection 2. start and close of subflows (joins) 3. announce and withdrawals of addresses 4. subflow priority (backup/non-backup) change. Reviewed-by: Matthieu Baerts Signed-off-by: Florian Westphal Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/uapi/linux/mptcp.h | 74 +++++++++++ net/mptcp/pm.c | 20 ++- net/mptcp/pm_netlink.c | 261 ++++++++++++++++++++++++++++++++++++- net/mptcp/protocol.c | 10 +- net/mptcp/protocol.h | 6 + 5 files changed, 364 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 3674a451a18c..c91578aaab32 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -36,6 +36,7 @@ enum { /* netlink interface */ #define MPTCP_PM_NAME "mptcp_pm" #define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds" +#define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events" #define MPTCP_PM_VER 0x1 /* @@ -104,4 +105,77 @@ struct mptcp_info { __u64 mptcpi_rcv_nxt; }; +/* + * MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport + * A new MPTCP connection has been created. It is the good time to allocate + * memory and send ADD_ADDR if needed. Depending on the traffic-patterns + * it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent. + * + * MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport + * A MPTCP connection is established (can start new subflows). + * + * MPTCP_EVENT_CLOSED: token + * A MPTCP connection has stopped. + * + * MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport] + * A new address has been announced by the peer. + * + * MPTCP_EVENT_REMOVED: token, rem_id + * An address has been lost by the peer. + * + * MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6, + * daddr4 | daddr6, sport, dport, backup, + * if_idx [, error] + * A new subflow has been established. 'error' should not be set. + * + * MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, backup, if_idx [, error] + * A subflow has been closed. An error (copy of sk_err) could be set if an + * error has been detected for this subflow. + * + * MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6, + * sport, dport, backup, if_idx [, error] + * The priority of a subflow has changed. 'error' should not be set. + */ +enum mptcp_event_type { + MPTCP_EVENT_UNSPEC = 0, + MPTCP_EVENT_CREATED = 1, + MPTCP_EVENT_ESTABLISHED = 2, + MPTCP_EVENT_CLOSED = 3, + + MPTCP_EVENT_ANNOUNCED = 6, + MPTCP_EVENT_REMOVED = 7, + + MPTCP_EVENT_SUB_ESTABLISHED = 10, + MPTCP_EVENT_SUB_CLOSED = 11, + + MPTCP_EVENT_SUB_PRIORITY = 13, +}; + +enum mptcp_event_attr { + MPTCP_ATTR_UNSPEC = 0, + + MPTCP_ATTR_TOKEN, /* u32 */ + MPTCP_ATTR_FAMILY, /* u16 */ + MPTCP_ATTR_LOC_ID, /* u8 */ + MPTCP_ATTR_REM_ID, /* u8 */ + MPTCP_ATTR_SADDR4, /* be32 */ + MPTCP_ATTR_SADDR6, /* struct in6_addr */ + MPTCP_ATTR_DADDR4, /* be32 */ + MPTCP_ATTR_DADDR6, /* struct in6_addr */ + MPTCP_ATTR_SPORT, /* be16 */ + MPTCP_ATTR_DPORT, /* be16 */ + MPTCP_ATTR_BACKUP, /* u8 */ + MPTCP_ATTR_ERROR, /* u8 */ + MPTCP_ATTR_FLAGS, /* u16 */ + MPTCP_ATTR_TIMEOUT, /* u32 */ + MPTCP_ATTR_IF_IDX, /* s32 */ + + __MPTCP_ATTR_AFTER_LAST +}; + +#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1) + #endif /* _UAPI_MPTCP_H */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 1dd0e9d7ed06..6fd4b2c1b076 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -75,6 +75,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); WRITE_ONCE(pm->server_side, server_side); + mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); } bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) @@ -122,13 +123,10 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) { struct mptcp_pm_data *pm = &msk->pm; + bool announce = false; pr_debug("msk=%p", msk); - /* try to avoid acquiring the lock below */ - if (!READ_ONCE(pm->work_pending)) - return; - spin_lock_bh(&pm->lock); /* mptcp_pm_fully_established() can be invoked by multiple @@ -138,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, if (READ_ONCE(pm->work_pending) && !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); - msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); + if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) + announce = true; + + msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); + + if (announce) + mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); } void mptcp_pm_connection_closed(struct mptcp_sock *msk) @@ -179,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, READ_ONCE(pm->accept_addr)); + mptcp_event_addr_announced(msk, addr); + spin_lock_bh(&pm->lock); if (!READ_ONCE(pm->accept_addr)) { @@ -205,6 +211,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id) pr_debug("msk=%p remote_id=%d", msk, rm_id); + mptcp_event_addr_removed(msk, rm_id); + spin_lock_bh(&pm->lock); mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED); pm->rm_id = rm_id; @@ -217,6 +225,8 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup) pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); subflow->backup = bkup; + + mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC); } /* path manager helpers */ diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index c3abff40fa4e..229fd1af2e29 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -860,10 +860,14 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk) WRITE_ONCE(pm->accept_subflow, subflows); } -#define MPTCP_PM_CMD_GRP_OFFSET 0 +#define MPTCP_PM_CMD_GRP_OFFSET 0 +#define MPTCP_PM_EV_GRP_OFFSET 1 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, + [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, + .flags = GENL_UNS_ADMIN_PERM, + }, }; static const struct nla_policy @@ -1482,6 +1486,261 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return 0; } +static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) +{ + genlmsg_multicast_netns(&mptcp_genl_family, net, + nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); +} + +static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) +{ + const struct inet_sock *issk = inet_sk(ssk); + const struct mptcp_subflow_context *sf; + + if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) + return -EMSGSIZE; + + switch (ssk->sk_family) { + case AF_INET: + if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) + return -EMSGSIZE; + if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) + return -EMSGSIZE; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: { + const struct ipv6_pinfo *np = inet6_sk(ssk); + + if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) + return -EMSGSIZE; + if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) + return -EMSGSIZE; + break; + } +#endif + default: + WARN_ON_ONCE(1); + return -EMSGSIZE; + } + + if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) + return -EMSGSIZE; + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) + return -EMSGSIZE; + + sf = mptcp_subflow_ctx(ssk); + if (WARN_ON_ONCE(!sf)) + return -EINVAL; + + if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) + return -EMSGSIZE; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) + return -EMSGSIZE; + + return 0; +} + +static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + const struct sock *sk = (const struct sock *)msk; + const struct mptcp_subflow_context *sf; + u8 sk_err; + + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + return -EMSGSIZE; + + if (mptcp_event_add_subflow(skb, ssk)) + return -EMSGSIZE; + + sf = mptcp_subflow_ctx(ssk); + if (WARN_ON_ONCE(!sf)) + return -EINVAL; + + if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) + return -EMSGSIZE; + + if (ssk->sk_bound_dev_if && + nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) + return -EMSGSIZE; + + sk_err = ssk->sk_err; + if (sk_err && sk->sk_state == TCP_ESTABLISHED && + nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) + return -EMSGSIZE; + + return 0; +} + +static int mptcp_event_sub_established(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + return mptcp_event_put_token_and_ssk(skb, msk, ssk); +} + +static int mptcp_event_sub_closed(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) + return -EMSGSIZE; + + return 0; +} + +static int mptcp_event_created(struct sk_buff *skb, + const struct mptcp_sock *msk, + const struct sock *ssk) +{ + int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token); + + if (err) + return err; + + return mptcp_event_add_subflow(skb, ssk); +} + +void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) +{ + struct net *net = sock_net((const struct sock *)msk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); + if (!nlh) + goto nla_put_failure; + + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + goto nla_put_failure; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) + goto nla_put_failure; + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); + return; + +nla_put_failure: + kfree_skb(skb); +} + +void mptcp_event_addr_announced(const struct mptcp_sock *msk, + const struct mptcp_addr_info *info) +{ + struct net *net = sock_net((const struct sock *)msk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, + MPTCP_EVENT_ANNOUNCED); + if (!nlh) + goto nla_put_failure; + + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) + goto nla_put_failure; + + if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) + goto nla_put_failure; + + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) + goto nla_put_failure; + + switch (info->family) { + case AF_INET: + if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) + goto nla_put_failure; + break; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + case AF_INET6: + if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) + goto nla_put_failure; + break; +#endif + default: + WARN_ON_ONCE(1); + goto nla_put_failure; + } + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); + return; + +nla_put_failure: + kfree_skb(skb); +} + +void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, + const struct sock *ssk, gfp_t gfp) +{ + struct net *net = sock_net((const struct sock *)msk); + struct nlmsghdr *nlh; + struct sk_buff *skb; + + if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) + return; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!skb) + return; + + nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); + if (!nlh) + goto nla_put_failure; + + switch (type) { + case MPTCP_EVENT_UNSPEC: + WARN_ON_ONCE(1); + break; + case MPTCP_EVENT_CREATED: + case MPTCP_EVENT_ESTABLISHED: + if (mptcp_event_created(skb, msk, ssk) < 0) + goto nla_put_failure; + break; + case MPTCP_EVENT_CLOSED: + if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0) + goto nla_put_failure; + break; + case MPTCP_EVENT_ANNOUNCED: + case MPTCP_EVENT_REMOVED: + /* call mptcp_event_addr_announced()/removed instead */ + WARN_ON_ONCE(1); + break; + case MPTCP_EVENT_SUB_ESTABLISHED: + case MPTCP_EVENT_SUB_PRIORITY: + if (mptcp_event_sub_established(skb, msk, ssk) < 0) + goto nla_put_failure; + break; + case MPTCP_EVENT_SUB_CLOSED: + if (mptcp_event_sub_closed(skb, msk, ssk) < 0) + goto nla_put_failure; + break; + } + + genlmsg_end(skb, nlh); + mptcp_nl_mcast_send(net, skb, gfp); + return; + +nla_put_failure: + kfree_skb(skb); +} + static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_ADD_ADDR, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index fe6da1b77723..c2a8392254dc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2150,6 +2150,8 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + if (sk->sk_state == TCP_ESTABLISHED) + mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL); __mptcp_close_ssk(sk, ssk, subflow); } @@ -2586,6 +2588,10 @@ cleanup: release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); + + if (mptcp_sk(sk)->token) + mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); + sock_put(sk); } @@ -3057,7 +3063,7 @@ bool mptcp_finish_join(struct sock *ssk) return false; if (!msk->pm.server_side) - return true; + goto out; if (!mptcp_pm_allow_new_subflow(msk)) return false; @@ -3084,6 +3090,8 @@ bool mptcp_finish_join(struct sock *ssk) if (parent_sock && !ssk->sk_socket) mptcp_sock_graft(ssk, parent_sock); subflow->map_seq = READ_ONCE(msk->ack_seq); +out: + mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); return true; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f620e2f98d19..d31edbae8da8 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -10,6 +10,7 @@ #include #include #include +#include #define MPTCP_SUPPORTED_VERSION 1 @@ -666,6 +667,11 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id); int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id); +void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, + const struct sock *ssk, gfp_t gfp); +void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); +void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); + static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) { return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);