Merge branch 'mptcp-genl-events'

Mat Martineau says:

====================
mptcp: Add genl events for connection info

This series from the MPTCP tree adds genl multicast events that are
important for implementing a userspace path manager. In MPTCP, a path
manager is responsible for adding or removing additional subflows on
each MPTCP connection. The in-kernel path manager (already part of the
kernel) is a better fit for many server use cases, but the additional
flexibility of userspace path managers is often useful for client
devices.

Patches 1, 2, 4, 5, and 6 do some refactoring to streamline the netlink
event implementation in the final patch.

Patch 3 improves the timeliness of subflow destruction to ensure the
'subflow closed' event will be sent soon enough.

Patch 7 allows use of the GENL_UNS_ADMIN_PERM flag on genl mcast groups
to mandate CAP_NET_ADMIN, which is important to protect token information
in the MPTCP events. This is a genetlink change.

Patch 8 adds the MPTCP netlink events.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-02-12 16:31:46 -08:00
commit 0a2f6b32cc
9 changed files with 492 additions and 72 deletions

View File

@ -14,6 +14,7 @@
*/
struct genl_multicast_group {
char name[GENL_NAMSIZ];
u8 flags;
};
struct genl_ops;

View File

@ -36,6 +36,7 @@ enum {
/* netlink interface */
#define MPTCP_PM_NAME "mptcp_pm"
#define MPTCP_PM_CMD_GRP_NAME "mptcp_pm_cmds"
#define MPTCP_PM_EV_GRP_NAME "mptcp_pm_events"
#define MPTCP_PM_VER 0x1
/*
@ -104,4 +105,77 @@ struct mptcp_info {
__u64 mptcpi_rcv_nxt;
};
/*
* MPTCP_EVENT_CREATED: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport
* A new MPTCP connection has been created. It is the good time to allocate
* memory and send ADD_ADDR if needed. Depending on the traffic-patterns
* it can take a long time until the MPTCP_EVENT_ESTABLISHED is sent.
*
* MPTCP_EVENT_ESTABLISHED: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport
* A MPTCP connection is established (can start new subflows).
*
* MPTCP_EVENT_CLOSED: token
* A MPTCP connection has stopped.
*
* MPTCP_EVENT_ANNOUNCED: token, rem_id, family, daddr4 | daddr6 [, dport]
* A new address has been announced by the peer.
*
* MPTCP_EVENT_REMOVED: token, rem_id
* An address has been lost by the peer.
*
* MPTCP_EVENT_SUB_ESTABLISHED: token, family, saddr4 | saddr6,
* daddr4 | daddr6, sport, dport, backup,
* if_idx [, error]
* A new subflow has been established. 'error' should not be set.
*
* MPTCP_EVENT_SUB_CLOSED: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport, backup, if_idx [, error]
* A subflow has been closed. An error (copy of sk_err) could be set if an
* error has been detected for this subflow.
*
* MPTCP_EVENT_SUB_PRIORITY: token, family, saddr4 | saddr6, daddr4 | daddr6,
* sport, dport, backup, if_idx [, error]
* The priority of a subflow has changed. 'error' should not be set.
*/
enum mptcp_event_type {
MPTCP_EVENT_UNSPEC = 0,
MPTCP_EVENT_CREATED = 1,
MPTCP_EVENT_ESTABLISHED = 2,
MPTCP_EVENT_CLOSED = 3,
MPTCP_EVENT_ANNOUNCED = 6,
MPTCP_EVENT_REMOVED = 7,
MPTCP_EVENT_SUB_ESTABLISHED = 10,
MPTCP_EVENT_SUB_CLOSED = 11,
MPTCP_EVENT_SUB_PRIORITY = 13,
};
enum mptcp_event_attr {
MPTCP_ATTR_UNSPEC = 0,
MPTCP_ATTR_TOKEN, /* u32 */
MPTCP_ATTR_FAMILY, /* u16 */
MPTCP_ATTR_LOC_ID, /* u8 */
MPTCP_ATTR_REM_ID, /* u8 */
MPTCP_ATTR_SADDR4, /* be32 */
MPTCP_ATTR_SADDR6, /* struct in6_addr */
MPTCP_ATTR_DADDR4, /* be32 */
MPTCP_ATTR_DADDR6, /* struct in6_addr */
MPTCP_ATTR_SPORT, /* be16 */
MPTCP_ATTR_DPORT, /* be16 */
MPTCP_ATTR_BACKUP, /* u8 */
MPTCP_ATTR_ERROR, /* u8 */
MPTCP_ATTR_FLAGS, /* u16 */
MPTCP_ATTR_TIMEOUT, /* u32 */
MPTCP_ATTR_IF_IDX, /* s32 */
__MPTCP_ATTR_AFTER_LAST
};
#define MPTCP_ATTR_MAX (__MPTCP_ATTR_AFTER_LAST - 1)
#endif /* _UAPI_MPTCP_H */

View File

@ -867,7 +867,7 @@ fully_established:
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
mptcp_pm_fully_established(msk);
mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC);
}
return true;

View File

@ -68,13 +68,14 @@ int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id)
/* path manager event handlers */
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side)
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side)
{
struct mptcp_pm_data *pm = &msk->pm;
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
WRITE_ONCE(pm->server_side, server_side);
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);
}
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk)
@ -119,16 +120,13 @@ static bool mptcp_pm_schedule_work(struct mptcp_sock *msk,
return true;
}
void mptcp_pm_fully_established(struct mptcp_sock *msk)
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp)
{
struct mptcp_pm_data *pm = &msk->pm;
bool announce = false;
pr_debug("msk=%p", msk);
/* try to avoid acquiring the lock below */
if (!READ_ONCE(pm->work_pending))
return;
spin_lock_bh(&pm->lock);
/* mptcp_pm_fully_established() can be invoked by multiple
@ -138,9 +136,15 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk)
if (READ_ONCE(pm->work_pending) &&
!(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)))
mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED);
msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0)
announce = true;
msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED);
spin_unlock_bh(&pm->lock);
if (announce)
mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp);
}
void mptcp_pm_connection_closed(struct mptcp_sock *msk)
@ -179,6 +183,8 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk,
pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id,
READ_ONCE(pm->accept_addr));
mptcp_event_addr_announced(msk, addr);
spin_lock_bh(&pm->lock);
if (!READ_ONCE(pm->accept_addr)) {
@ -205,6 +211,8 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, u8 rm_id)
pr_debug("msk=%p remote_id=%d", msk, rm_id);
mptcp_event_addr_removed(msk, rm_id);
spin_lock_bh(&pm->lock);
mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED);
pm->rm_id = rm_id;
@ -217,6 +225,8 @@ void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup)
pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup);
subflow->backup = bkup;
mptcp_event(MPTCP_EVENT_SUB_PRIORITY, mptcp_sk(subflow->conn), sk, GFP_ATOMIC);
}
/* path manager helpers */

View File

@ -56,6 +56,8 @@ struct pm_nl_pernet {
#define MPTCP_PM_ADDR_MAX 8
#define ADD_ADDR_RETRANS_MAX 3
static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
static bool addresses_equal(const struct mptcp_addr_info *a,
struct mptcp_addr_info *b, bool use_port)
{
@ -448,17 +450,17 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
}
}
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
{
mptcp_pm_create_subflow_or_signal_addr(msk);
}
void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
@ -498,7 +500,7 @@ void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
mptcp_pm_nl_add_addr_send_ack(msk);
}
void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
static void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@ -568,7 +570,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
return -EINVAL;
}
void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
struct sock *sk = (struct sock *)msk;
@ -592,7 +594,7 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
__mptcp_close_ssk(sk, ssk, subflow);
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.add_addr_accepted--;
@ -605,6 +607,39 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk)
}
}
void mptcp_pm_nl_work(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
msk_owned_by_me(msk);
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
mptcp_pm_nl_add_addr_send_ack(msk);
}
if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
mptcp_pm_nl_rm_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
mptcp_pm_nl_fully_established(msk);
}
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
mptcp_pm_nl_subflow_established(msk);
}
spin_unlock_bh(&msk->pm.lock);
}
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
{
struct mptcp_subflow_context *subflow, *tmp;
@ -629,7 +664,7 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id)
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
__mptcp_close_ssk(sk, ssk, subflow);
mptcp_close_ssk(sk, ssk, subflow);
spin_lock_bh(&msk->pm.lock);
msk->pm.local_addr_used--;
@ -825,10 +860,14 @@ void mptcp_pm_nl_data_init(struct mptcp_sock *msk)
WRITE_ONCE(pm->accept_subflow, subflows);
}
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_CMD_GRP_OFFSET 0
#define MPTCP_PM_EV_GRP_OFFSET 1
static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
[MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
[MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
.flags = GENL_UNS_ADMIN_PERM,
},
};
static const struct nla_policy
@ -1447,6 +1486,261 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return 0;
}
static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
{
genlmsg_multicast_netns(&mptcp_genl_family, net,
nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp);
}
static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
{
const struct inet_sock *issk = inet_sk(ssk);
const struct mptcp_subflow_context *sf;
if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family))
return -EMSGSIZE;
switch (ssk->sk_family) {
case AF_INET:
if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr))
return -EMSGSIZE;
if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr))
return -EMSGSIZE;
break;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
case AF_INET6: {
const struct ipv6_pinfo *np = inet6_sk(ssk);
if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr))
return -EMSGSIZE;
if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr))
return -EMSGSIZE;
break;
}
#endif
default:
WARN_ON_ONCE(1);
return -EMSGSIZE;
}
if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport))
return -EMSGSIZE;
if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport))
return -EMSGSIZE;
sf = mptcp_subflow_ctx(ssk);
if (WARN_ON_ONCE(!sf))
return -EINVAL;
if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
return -EMSGSIZE;
return 0;
}
static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
const struct sock *sk = (const struct sock *)msk;
const struct mptcp_subflow_context *sf;
u8 sk_err;
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
return -EMSGSIZE;
if (mptcp_event_add_subflow(skb, ssk))
return -EMSGSIZE;
sf = mptcp_subflow_ctx(ssk);
if (WARN_ON_ONCE(!sf))
return -EINVAL;
if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup))
return -EMSGSIZE;
if (ssk->sk_bound_dev_if &&
nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
return -EMSGSIZE;
sk_err = ssk->sk_err;
if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
return -EMSGSIZE;
return 0;
}
static int mptcp_event_sub_established(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
return mptcp_event_put_token_and_ssk(skb, msk, ssk);
}
static int mptcp_event_sub_closed(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
if (mptcp_event_put_token_and_ssk(skb, msk, ssk))
return -EMSGSIZE;
return 0;
}
static int mptcp_event_created(struct sk_buff *skb,
const struct mptcp_sock *msk,
const struct sock *ssk)
{
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
if (err)
return err;
return mptcp_event_add_subflow(skb, ssk);
}
void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
{
struct net *net = sock_net((const struct sock *)msk);
struct nlmsghdr *nlh;
struct sk_buff *skb;
if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
return;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
return;
nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED);
if (!nlh)
goto nla_put_failure;
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
goto nla_put_failure;
genlmsg_end(skb, nlh);
mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
return;
nla_put_failure:
kfree_skb(skb);
}
void mptcp_event_addr_announced(const struct mptcp_sock *msk,
const struct mptcp_addr_info *info)
{
struct net *net = sock_net((const struct sock *)msk);
struct nlmsghdr *nlh;
struct sk_buff *skb;
if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
return;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
return;
nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0,
MPTCP_EVENT_ANNOUNCED);
if (!nlh)
goto nla_put_failure;
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
goto nla_put_failure;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
goto nla_put_failure;
if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port))
goto nla_put_failure;
switch (info->family) {
case AF_INET:
if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr))
goto nla_put_failure;
break;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
case AF_INET6:
if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6))
goto nla_put_failure;
break;
#endif
default:
WARN_ON_ONCE(1);
goto nla_put_failure;
}
genlmsg_end(skb, nlh);
mptcp_nl_mcast_send(net, skb, GFP_ATOMIC);
return;
nla_put_failure:
kfree_skb(skb);
}
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp)
{
struct net *net = sock_net((const struct sock *)msk);
struct nlmsghdr *nlh;
struct sk_buff *skb;
if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET))
return;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
if (!skb)
return;
nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type);
if (!nlh)
goto nla_put_failure;
switch (type) {
case MPTCP_EVENT_UNSPEC:
WARN_ON_ONCE(1);
break;
case MPTCP_EVENT_CREATED:
case MPTCP_EVENT_ESTABLISHED:
if (mptcp_event_created(skb, msk, ssk) < 0)
goto nla_put_failure;
break;
case MPTCP_EVENT_CLOSED:
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
goto nla_put_failure;
break;
case MPTCP_EVENT_ANNOUNCED:
case MPTCP_EVENT_REMOVED:
/* call mptcp_event_addr_announced()/removed instead */
WARN_ON_ONCE(1);
break;
case MPTCP_EVENT_SUB_ESTABLISHED:
case MPTCP_EVENT_SUB_PRIORITY:
if (mptcp_event_sub_established(skb, msk, ssk) < 0)
goto nla_put_failure;
break;
case MPTCP_EVENT_SUB_CLOSED:
if (mptcp_event_sub_closed(skb, msk, ssk) < 0)
goto nla_put_failure;
break;
}
genlmsg_end(skb, nlh);
mptcp_nl_mcast_send(net, skb, gfp);
return;
nla_put_failure:
kfree_skb(skb);
}
static const struct genl_small_ops mptcp_pm_ops[] = {
{
.cmd = MPTCP_PM_CMD_ADD_ADDR,

View File

@ -2114,8 +2114,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
* so we need to use tcp_close() after detaching them from the mptcp
* parent socket.
*/
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
list_del(&subflow->node);
@ -2147,42 +2147,19 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
sock_put(ssk);
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
__mptcp_close_ssk(sk, ssk, subflow);
}
static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
{
return 0;
}
static void pm_work(struct mptcp_sock *msk)
{
struct mptcp_pm_data *pm = &msk->pm;
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED);
mptcp_pm_nl_add_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) {
pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK);
mptcp_pm_nl_add_addr_send_ack(msk);
}
if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) {
pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED);
mptcp_pm_nl_rm_addr_received(msk);
}
if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_ESTABLISHED);
mptcp_pm_nl_fully_established(msk);
}
if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) {
pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED);
mptcp_pm_nl_subflow_established(msk);
}
spin_unlock_bh(&msk->pm.lock);
}
static void __mptcp_close_subflow(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
@ -2195,7 +2172,11 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
if (inet_sk_state_load(ssk) != TCP_CLOSE)
continue;
__mptcp_close_ssk((struct sock *)msk, ssk, subflow);
/* 'subflow_data_ready' will re-sched once rx queue is empty */
if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
continue;
mptcp_close_ssk((struct sock *)msk, ssk, subflow);
}
}
@ -2267,11 +2248,8 @@ static void mptcp_worker(struct work_struct *work)
mptcp_check_fastclose(msk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
if (msk->pm.status)
pm_work(msk);
mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
mptcp_check_for_eof(msk);
@ -2291,6 +2269,9 @@ static void mptcp_worker(struct work_struct *work)
goto unlock;
}
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
@ -2607,6 +2588,10 @@ cleanup:
release_sock(sk);
if (do_cancel_work)
mptcp_cancel_work(sk);
if (mptcp_sk(sk)->token)
mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
sock_put(sk);
}
@ -3049,7 +3034,7 @@ void mptcp_finish_connect(struct sock *ssk)
WRITE_ONCE(msk->can_ack, 1);
WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, 0);
mptcp_pm_new_connection(msk, ssk, 0);
mptcp_rcv_space_init(msk, ssk);
}
@ -3078,7 +3063,7 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
if (!msk->pm.server_side)
return true;
goto out;
if (!mptcp_pm_allow_new_subflow(msk))
return false;
@ -3105,6 +3090,8 @@ bool mptcp_finish_join(struct sock *ssk)
if (parent_sock && !ssk->sk_socket)
mptcp_sock_graft(ssk, parent_sock);
subflow->map_seq = READ_ONCE(msk->ack_seq);
out:
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
}
@ -3281,9 +3268,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
struct mptcp_sock *msk = mptcp_sk(newsock->sk);
struct mptcp_subflow_context *subflow;
struct sock *newsk = newsock->sk;
bool slowpath;
slowpath = lock_sock_fast(newsk);
lock_sock(newsk);
/* PM/worker can now acquire the first subflow socket
* lock without racing with listener queue cleanup,
@ -3293,7 +3279,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
list_add(&subflow->node, &msk->conn_list);
sock_hold(msk->first);
if (mptcp_is_fully_established(newsk))
mptcp_pm_fully_established(msk);
mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
mptcp_copy_inaddrs(newsk, msk->first);
mptcp_rcv_space_init(msk, msk->first);
@ -3309,7 +3295,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk->sk_socket)
mptcp_sock_graft(ssk, newsock);
}
unlock_sock_fast(newsk, slowpath);
release_sock(newsk);
}
if (inet_csk_listen_poll(ssock->sk))

View File

@ -10,6 +10,7 @@
#include <linux/random.h>
#include <net/tcp.h>
#include <net/inet_connection_sock.h>
#include <uapi/linux/mptcp.h>
#define MPTCP_SUPPORTED_VERSION 1
@ -539,8 +540,8 @@ void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
@ -639,8 +640,8 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac);
void __init mptcp_pm_init(void);
void mptcp_pm_data_init(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp);
bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);
void mptcp_pm_connection_closed(struct mptcp_sock *msk);
void mptcp_pm_subflow_established(struct mptcp_sock *msk,
@ -666,6 +667,11 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
int mptcp_pm_remove_addr(struct mptcp_sock *msk, u8 local_id);
int mptcp_pm_remove_subflow(struct mptcp_sock *msk, u8 local_id);
void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
const struct sock *ssk, gfp_t gfp);
void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info);
void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id);
static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk)
{
return READ_ONCE(msk->pm.addr_signal) & BIT(MPTCP_ADD_ADDR_SIGNAL);
@ -713,11 +719,7 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_data_init(struct mptcp_sock *msk);
void mptcp_pm_nl_fully_established(struct mptcp_sock *msk);
void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk);
void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(struct mptcp_sock *msk);

View File

@ -675,7 +675,7 @@ create_child:
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
@ -953,6 +953,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
subflow->map_valid = 0;
}
/* sched mptcp worker to remove the subflow if no more data is pending */
static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
if (likely(ssk->sk_state != TCP_CLOSE))
return;
if (skb_queue_empty(&ssk->sk_receive_queue) &&
!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
sock_hold(sk);
if (!schedule_work(&msk->work))
sock_put(sk);
}
}
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@ -991,11 +1007,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
if (status != MAPPING_OK)
return false;
goto no_data;
skb = skb_peek(&ssk->sk_receive_queue);
if (WARN_ON_ONCE(!skb))
return false;
goto no_data;
/* if msk lacks the remote key, this subflow must provide an
* MP_CAPABLE-based mapping
@ -1029,6 +1045,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
return true;
no_data:
subflow_sched_work_if_closed(msk, ssk);
return false;
fatal:
/* fatal protocol error, close the socket */
/* This barrier is coupled with smp_rmb() in tcp_poll() */
@ -1413,6 +1432,8 @@ static void subflow_state_change(struct sock *sk)
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;

View File

@ -1360,11 +1360,43 @@ static struct genl_family genl_ctrl __ro_after_init = {
.netnsok = true,
};
static int genl_bind(struct net *net, int group)
{
const struct genl_family *family;
unsigned int id;
int ret = 0;
genl_lock_all();
idr_for_each_entry(&genl_fam_idr, family, id) {
const struct genl_multicast_group *grp;
int i;
if (family->n_mcgrps == 0)
continue;
i = group - family->mcgrp_offset;
if (i < 0 || i >= family->n_mcgrps)
continue;
grp = &family->mcgrps[i];
if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
break;
}
genl_unlock_all();
return ret;
}
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
};
/* we'll bump the group number right afterwards */