From d58c35ca5202edea02d8201f4acd81e06c98f9b4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Jan 2020 13:53:44 +0200 Subject: [PATCH 1/6] mlxsw: spectrum: Do not enforce same firmware version for multiple ASICs In commit a72afb6879bb ("mlxsw: Enforce firmware version for Spectrum-2") I added a required firmware version for Spectrum-2, but missed the fact that mlxsw_sp2_init() is used by both Spectrum-2 and Spectrum-3. This means that the same firmware version will be used for both, which is wrong. Fix this by creating a new init() callback for Spectrum-3. Fixes: a72afb6879bb ("mlxsw: Enforce firmware version for Spectrum-2") Signed-off-by: Ido Schimmel Acked-by: Jiri Pirko Tested-by: Shalom Toledo Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index f7fd5e8fbf96..5408a964bd10 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5132,6 +5132,27 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } +static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, + const struct mlxsw_bus_info *mlxsw_bus_info, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + mlxsw_sp->kvdl_ops = &mlxsw_sp2_kvdl_ops; + mlxsw_sp->afa_ops = &mlxsw_sp2_act_afa_ops; + mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; + mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; + mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; + mlxsw_sp->mac_mask = mlxsw_sp2_mac_mask; + mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; + mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; + mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; + + return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); +} + static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); @@ -5634,7 +5655,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = { static struct mlxsw_driver mlxsw_sp3_driver = { .kind = mlxsw_sp3_driver_name, .priv_size = sizeof(struct mlxsw_sp), - .init = mlxsw_sp2_init, + .init = mlxsw_sp3_init, .fini = mlxsw_sp_fini, .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set, .port_split = mlxsw_sp_port_split, From 2da51ce75d86ab1f7770ac1391a9a1697ddaa60c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Jan 2020 13:53:45 +0200 Subject: [PATCH 2/6] mlxsw: spectrum: Do not modify cloned SKBs during xmit The driver needs to prepend a Tx header to each packet it is transmitting. The header includes information such as the egress port and traffic class. The addition of the header requires the driver to modify the SKB's header and therefore it must not be shared. Otherwise, we risk hitting various race conditions. For example, when a packet is flooded (cloned) by the bridge driver to two switch ports swp1 and swp2: t0 - mlxsw_sp_port_xmit() is called for swp1. Tx header is prepended with swp1's port number t1 - mlxsw_sp_port_xmit() is called for swp2. Tx header is prepended with swp2's port number, overwriting swp1's port number t2 - The device processes data buffer from t0. Packet is transmitted via swp2 t3 - The device processes data buffer from t1. Packet is transmitted via swp2 Usually, the device is fast enough and transmits the packet before its Tx header is overwritten, but this is not the case in emulated environments. Fix this by making sure the SKB's header is writable by calling skb_cow_head(). Since the function ensures we have headroom to push the Tx header, the check further in the function can be removed. v2: * Use skb_cow_head() instead of skb_unshare() as suggested by Jakub * Remove unnecessary check regarding headroom Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Ido Schimmel Reported-by: Shalom Toledo Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5408a964bd10..2394c425b47d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -860,23 +860,17 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; - if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { - struct sk_buff *skb_orig = skb; - - skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); - if (!skb) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb_orig); - return NETDEV_TX_OK; - } - dev_consume_skb_any(skb_orig); - } - if (eth_skb_pad(skb)) { this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); return NETDEV_TX_OK; From 63963d0f9d17be83d0e419e03282847ecc2c3715 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Jan 2020 13:53:46 +0200 Subject: [PATCH 3/6] mlxsw: switchx2: Do not modify cloned SKBs during xmit The driver needs to prepend a Tx header to each packet it is transmitting. The header includes information such as the egress port and traffic class. The addition of the header requires the driver to modify the SKB's header and therefore it must not be shared. Otherwise, we risk hitting various race conditions. For example, when a packet is flooded (cloned) by the bridge driver to two switch ports swp1 and swp2: t0 - mlxsw_sp_port_xmit() is called for swp1. Tx header is prepended with swp1's port number t1 - mlxsw_sp_port_xmit() is called for swp2. Tx header is prepended with swp2's port number, overwriting swp1's port number t2 - The device processes data buffer from t0. Packet is transmitted via swp2 t3 - The device processes data buffer from t1. Packet is transmitted via swp2 Usually, the device is fast enough and transmits the packet before its Tx header is overwritten, but this is not the case in emulated environments. Fix this by making sure the SKB's header is writable by calling skb_cow_head(). Since the function ensures we have headroom to push the Tx header, the check further in the function can be removed. v2: * Use skb_cow_head() instead of skb_unshare() as suggested by Jakub * Remove unnecessary check regarding headroom Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") Signed-off-by: Ido Schimmel Reported-by: Shalom Toledo Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index de6cb22f68b1..f0e98ec8f1ee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -299,22 +299,17 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { + this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; - if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { - struct sk_buff *skb_orig = skb; - - skb = skb_realloc_headroom(skb, MLXSW_TXHDR_LEN); - if (!skb) { - this_cpu_inc(mlxsw_sx_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb_orig); - return NETDEV_TX_OK; - } - dev_consume_skb_any(skb_orig); - } mlxsw_sx_txhdr_construct(skb, &tx_info); /* TX header is consumed by HW on the way so we shouldn't count its * bytes as being sent. From fef6d6704944c7be72fd2b77c021f1aed3d5df0d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 15 Jan 2020 13:53:47 +0200 Subject: [PATCH 4/6] selftests: mlxsw: qos_mc_aware: Fix mausezahn invocation Mausezahn does not recognize "own" as a keyword on source IP address. As a result, the MC stream is not running at all, and therefore no UC degradation can be observed even in principle. Fix the invocation, and tighten the test: due to the minimum shaper configured at the MC TCs, we always expect about 20% degradation. Fail the test if it is lower. Fixes: 573363a68f27 ("selftests: mlxsw: Add qos_lib.sh") Signed-off-by: Petr Machata Reported-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 47315fe48d5a..24dd8ed48580 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -232,7 +232,7 @@ test_mc_aware() stop_traffic local ucth1=${uc_rate[1]} - start_traffic $h1 own bc bc + start_traffic $h1 192.0.2.65 bc bc local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) @@ -254,7 +254,11 @@ test_mc_aware() ret = 100 * ($ucth1 - $ucth2) / $ucth1 if (ret > 0) { ret } else { 0 } ") - check_err $(bc <<< "$deg > 25") + + # Minimum shaper of 200Mbps on MC TCs should cause about 20% of + # degradation on 1Gbps link. + check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect" + check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much" local interval=$((d1 - d0)) local mc_ir=$(rate $u0 $u1 $interval) From ca7609ff3680c51d6c29897f3117aa2ad904f92a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 15 Jan 2020 13:53:48 +0200 Subject: [PATCH 5/6] mlxsw: spectrum: Wipe xstats.backlog of down ports Per-port counter cache used by Qdiscs is updated periodically, unless the port is down. The fact that the cache is not updated for down ports is no problem for most counters, which are relative in nature. However, backlog is absolute in nature, and if there is a non-zero value in the cache around the time that the port goes down, that value just stays there. This value then leaks to offloaded Qdiscs that report non-zero backlog even if there (obviously) is no traffic. The HW does not keep backlog of a downed port, so do likewise: as the port goes down, wipe the backlog value from xstats. Fixes: 075ab8adaf4e ("mlxsw: spectrum: Collect tclass related stats periodically") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 2394c425b47d..8ed15199eb4f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1209,6 +1209,9 @@ static void update_stats_cache(struct work_struct *work) periodic_hw_stats.update_dw.work); if (!netif_carrier_ok(mlxsw_sp_port->dev)) + /* Note: mlxsw_sp_port_down_wipe_counters() clears the cache as + * necessary when port goes down. + */ goto out; mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, @@ -4318,6 +4321,15 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port, return 0; } +static void +mlxsw_sp_port_down_wipe_counters(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++) + mlxsw_sp_port->periodic_hw_stats.xstats.backlog[i] = 0; +} + static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, char *pude_pl, void *priv) { @@ -4339,6 +4351,7 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, } else { netdev_info(mlxsw_sp_port->dev, "link down\n"); netif_carrier_off(mlxsw_sp_port->dev); + mlxsw_sp_port_down_wipe_counters(mlxsw_sp_port); } } From 85005b82e59fa7bb7388b12594ab2067bf73d66c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 15 Jan 2020 13:53:49 +0200 Subject: [PATCH 6/6] mlxsw: spectrum_qdisc: Include MC TCs in Qdisc counters mlxsw configures Spectrum in such a way that BUM traffic is passed not through its nominal traffic class TC, but through its MC counterpart TC+8. However, when collecting statistics, Qdiscs only look at the nominal TC and ignore the MC TC. Add two helpers to compute the value for logical TC from the constituents, one for backlog, the other for tail drops. Use them throughout instead of going through the xstats pointer directly. Counters for TX bytes and packets are deduced from packet priority counters, and therefore already include BUM traffic. wred_drop counter is irrelevant on MC TCs, because RED is not enabled on them. Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports") Signed-off-by: Petr Machata Acked-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 46d43cfd04e9..0124bfe1963b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -195,6 +195,20 @@ mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, return -EOPNOTSUPP; } +static u64 +mlxsw_sp_xstats_backlog(struct mlxsw_sp_port_xstats *xstats, int tclass_num) +{ + return xstats->backlog[tclass_num] + + xstats->backlog[tclass_num + 8]; +} + +static u64 +mlxsw_sp_xstats_tail_drop(struct mlxsw_sp_port_xstats *xstats, int tclass_num) +{ + return xstats->tail_drop[tclass_num] + + xstats->tail_drop[tclass_num + 8]; +} + static void mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, u8 prio_bitmap, u64 *tx_packets, @@ -269,7 +283,7 @@ mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, &stats_base->tx_bytes); red_base->prob_mark = xstats->ecn; red_base->prob_drop = xstats->wred_drop[tclass_num]; - red_base->pdrop = xstats->tail_drop[tclass_num]; + red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num); stats_base->overlimits = red_base->prob_drop + red_base->prob_mark; stats_base->drops = red_base->prob_drop + red_base->pdrop; @@ -370,7 +384,8 @@ mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; marks = xstats->ecn - xstats_base->prob_mark; - pdrops = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - + xstats_base->pdrop; res->pdrop += pdrops; res->prob_drop += early_drops; @@ -403,9 +418,10 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; - drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - + drops = xstats->wred_drop[tclass_num] + + mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - stats_base->drops; - backlog = xstats->backlog[tclass_num]; + backlog = mlxsw_sp_xstats_backlog(xstats, tclass_num); _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); stats_ptr->qstats->overlimits += overlimits; @@ -576,9 +592,9 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, tx_packets = stats->tx_packets - stats_base->tx_packets; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - drops += xstats->tail_drop[i]; + drops += mlxsw_sp_xstats_tail_drop(xstats, i); drops += xstats->wred_drop[i]; - backlog += xstats->backlog[i]; + backlog += mlxsw_sp_xstats_backlog(xstats, i); } drops = drops - stats_base->drops; @@ -614,7 +630,7 @@ mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, stats_base->drops = 0; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - stats_base->drops += xstats->tail_drop[i]; + stats_base->drops += mlxsw_sp_xstats_tail_drop(xstats, i); stats_base->drops += xstats->wred_drop[i]; }