From 06870682087b58398671e8cdc896cd62314c4399 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 17 Dec 2019 16:20:42 +0000 Subject: [PATCH 1/4] xsk: Add rcu_read_lock around the XSK wakeup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The XSK wakeup callback in drivers makes some sanity checks before triggering NAPI. However, some configuration changes may occur during this function that affect the result of those checks. For example, the interface can go down, and all the resources will be destroyed after the checks in the wakeup function, but before it attempts to use these resources. Wrap this callback in rcu_read_lock to allow driver to synchronize_rcu before actually destroying the resources. xsk_wakeup is a new function that encapsulates calling ndo_xsk_wakeup wrapped into the RCU lock. After this commit, xsk_poll starts using xsk_wakeup and checks xs->zc instead of ndo_xsk_wakeup != NULL to decide ndo_xsk_wakeup should be called. It also fixes a bug introduced with the need_wakeup feature: a non-zero-copy socket may be used with a driver supporting zero-copy, and in this case ndo_xsk_wakeup should not be called, so the xs->zc check is the correct one. Fixes: 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") Signed-off-by: Maxim Mikityanskiy Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191217162023.16011-2-maximmi@mellanox.com --- net/xdp/xsk.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 956793893c9d..328f661b83b2 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -334,12 +334,21 @@ out: } EXPORT_SYMBOL(xsk_umem_consume_tx); -static int xsk_zc_xmit(struct xdp_sock *xs) +static int xsk_wakeup(struct xdp_sock *xs, u8 flags) { struct net_device *dev = xs->dev; + int err; - return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, - XDP_WAKEUP_TX); + rcu_read_lock(); + err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags); + rcu_read_unlock(); + + return err; +} + +static int xsk_zc_xmit(struct xdp_sock *xs) +{ + return xsk_wakeup(xs, XDP_WAKEUP_TX); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -453,19 +462,16 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - struct net_device *dev; struct xdp_umem *umem; if (unlikely(!xsk_is_bound(xs))) return mask; - dev = xs->dev; umem = xs->umem; if (umem->need_wakeup) { - if (dev->netdev_ops->ndo_xsk_wakeup) - dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, - umem->need_wakeup); + if (xs->zc) + xsk_wakeup(xs, umem->need_wakeup); else /* Poll needs to drive Tx also in copy mode */ __xsk_sendmsg(sk); From 9cf88808ad6a0f1e958e00abd9a081295fe6da0c Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 17 Dec 2019 16:20:44 +0000 Subject: [PATCH 2/4] net/mlx5e: Fix concurrency issues between config flow and XSK After disabling resources necessary for XSK (the XDP program, channels, XSK queues), use synchronize_rcu to wait until the XSK wakeup function finishes, before freeing the resources. Suspend XSK wakeups during switching channels. If the XDP program is being removed, synchronize_rcu before closing the old channels to allow XSK wakeup to complete. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191217162023.16011-3-maximmi@mellanox.com --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en/xdp.h | 22 ++++++++----------- .../mellanox/mlx5/core/en/xsk/setup.c | 1 + .../ethernet/mellanox/mlx5/core/en/xsk/tx.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 +--------------- 5 files changed, 13 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2c16add0b642..9c8427698238 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -760,7 +760,7 @@ enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, - MLX5E_STATE_XDP_OPEN, + MLX5E_STATE_XDP_ACTIVE, }; struct mlx5e_rqt { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 36ac1e3816b9..d7587f40ecae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -75,12 +75,18 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); + + if (priv->channels.params.xdp_prog) + set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); } static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) { + if (priv->channels.params.xdp_prog) + clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); + clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); - /* let other device's napi(s) see our new state */ + /* Let other device's napi(s) and XSK wakeups see our new state. */ synchronize_rcu(); } @@ -89,19 +95,9 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); } -static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) +static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_XDP_OPEN, &priv->state); -} - -static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv) -{ - clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state); -} - -static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv) -{ - return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); + return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state); } static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 631af8dee517..c28cbae42331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -144,6 +144,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with the XSK wakeup. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c index 87827477d38c..fe2d596cb361 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -14,7 +14,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) struct mlx5e_channel *c; u16 ix; - if (unlikely(!mlx5e_xdp_is_open(priv))) + if (unlikely(!mlx5e_xdp_is_active(priv))) return -ENETDOWN; if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4980e80a5e85..4997b8a51994 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3000,12 +3000,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool is_xdp = priv->channels.params.xdp_prog; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); - if (is_xdp) - mlx5e_xdp_set_open(priv); err = mlx5e_open_channels(priv, &priv->channels); if (err) @@ -3020,8 +3017,6 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; err_clear_state_opened_flag: - if (is_xdp) - mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; } @@ -3053,8 +3048,6 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - if (priv->channels.params.xdp_prog) - mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); @@ -4371,16 +4364,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return 0; } -static int mlx5e_xdp_update_state(struct mlx5e_priv *priv) -{ - if (priv->channels.params.xdp_prog) - mlx5e_xdp_set_open(priv); - else - mlx5e_xdp_set_closed(priv); - - return 0; -} - static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4415,7 +4398,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) mlx5e_set_rq_type(priv->mdev, &new_channels.params); old_prog = priv->channels.params.xdp_prog; - err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL); if (err) goto unlock; } else { From b3873a5be757b44d51af542a50a6f2a3b6f95284 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 17 Dec 2019 16:20:45 +0000 Subject: [PATCH 3/4] net/i40e: Fix concurrency issues between config flow and XSK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use synchronize_rcu to wait until the XSK wakeup function finishes before destroying the resources it uses: 1. i40e_down already calls synchronize_rcu. On i40e_down either __I40E_VSI_DOWN or __I40E_CONFIG_BUSY is set. Check the latter in i40e_xsk_wakeup (the former is already checked there). 2. After switching the XDP program, call synchronize_rcu to let i40e_xsk_wakeup exit before the XDP program is freed. 3. Changing the number of channels brings the interface down (see i40e_prep_for_reset and i40e_pf_quiesce_all_vsi). 4. Disabling UMEM sets __I40E_CONFIG_BUSY, too. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191217162023.16011-4-maximmi@mellanox.com --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 10 +++++++--- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 4 ++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index cb6367334ca7..4833187bd259 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1152,7 +1152,7 @@ void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags); static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) { - return !!vsi->xdp_prog; + return !!READ_ONCE(vsi->xdp_prog); } int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1ccabeafa44c..2c5af6d4a6b1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -6823,8 +6823,8 @@ void i40e_down(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_queue_pairs; i++) { i40e_clean_tx_ring(vsi->tx_rings[i]); if (i40e_enabled_xdp_vsi(vsi)) { - /* Make sure that in-progress ndo_xdp_xmit - * calls are completed. + /* Make sure that in-progress ndo_xdp_xmit and + * ndo_xsk_wakeup calls are completed. */ synchronize_rcu(); i40e_clean_tx_ring(vsi->xdp_rings[i]); @@ -12546,8 +12546,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, old_prog = xchg(&vsi->xdp_prog, prog); - if (need_reset) + if (need_reset) { + if (!prog) + /* Wait until ndo_xsk_wakeup completes. */ + synchronize_rcu(); i40e_reset_and_rebuild(pf, true, true); + } for (i = 0; i < vsi->num_queue_pairs; i++) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index d07e1a890428..f73cd917c44f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -787,8 +787,12 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) { struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; struct i40e_ring *ring; + if (test_bit(__I40E_CONFIG_BUSY, pf->state)) + return -ENETDOWN; + if (test_bit(__I40E_VSI_DOWN, vsi->state)) return -ENETDOWN; From c0fdccfd226a1424683d3000d9e08384391210a2 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 17 Dec 2019 16:20:47 +0000 Subject: [PATCH 4/4] net/ixgbe: Fix concurrency issues between config flow and XSK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use synchronize_rcu to wait until the XSK wakeup function finishes before destroying the resources it uses: 1. ixgbe_down already calls synchronize_rcu after setting __IXGBE_DOWN. 2. After switching the XDP program, call synchronize_rcu to let ixgbe_xsk_wakeup exit before the XDP program is freed. 3. Changing the number of channels brings the interface down. 4. Disabling UMEM sets __IXGBE_TX_DISABLED before closing hardware resources and resetting xsk_umem. Check that bit in ixgbe_xsk_wakeup to avoid using the XDP ring when it's already destroyed. synchronize_rcu is called from ixgbe_txrx_ring_disable. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191217162023.16011-5-maximmi@mellanox.com --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 ++++++- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 25c097cd8100..82a30b597cf9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10261,7 +10261,12 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) /* If transitioning XDP modes reconfigure rings */ if (need_reset) { - int err = ixgbe_setup_tc(dev, adapter->hw_tcs); + int err; + + if (!prog) + /* Wait until ndo_xsk_wakeup completes. */ + synchronize_rcu(); + err = ixgbe_setup_tc(dev, adapter->hw_tcs); if (err) { rcu_assign_pointer(adapter->xdp_prog, old_prog); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index d6feaacfbf89..b43be9f14105 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -709,10 +709,14 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) if (qid >= adapter->num_xdp_queues) return -ENXIO; - if (!adapter->xdp_ring[qid]->xsk_umem) + ring = adapter->xdp_ring[qid]; + + if (test_bit(__IXGBE_TX_DISABLED, &ring->state)) + return -ENETDOWN; + + if (!ring->xsk_umem) return -ENXIO; - ring = adapter->xdp_ring[qid]; if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { u64 eics = BIT_ULL(ring->q_vector->v_idx);