From c2070152747ebc294f018e4b28bf2d9f719a6b36 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:05 +0200 Subject: [PATCH 01/14] net: sched: sch_tbf: Don't overwrite backlog before dumping In 2011, in commit b0460e4484f9 ("sch_tbf: report backlog information"), TBF started copying backlog depth from the child Qdisc before dumping, with the motivation that the backlog was otherwise not visible in "tc -s qdisc show". Later, in 2016, in commit 8d5958f424b6 ("sch_tbf: update backlog as well"), TBF got a full-blown backlog tracking. However it kept copying the child's backlog over before dumping. That line is now unnecessary, so remove it. As shown in the following example, backlog is still reported correctly: # tc -s qdisc show dev veth0 invisible qdisc tbf 1: root refcnt 2 rate 1Mbit burst 128Kb lat 82.8s Sent 505475370 bytes 406985 pkt (dropped 0, overlimits 812544 requeues 0) backlog 81972b 66p requeues 0 qdisc bfifo 0: parent 1:1 limit 10Mb Sent 505475370 bytes 406985 pkt (dropped 0, overlimits 0 requeues 0) backlog 81972b 66p requeues 0 Signed-off-by: Petr Machata <petrm@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/sch_tbf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 2cd94973795c..7ae317958090 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -441,7 +441,6 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest; struct tc_tbf_qopt opt; - sch->qstats.backlog = q->qdisc->qstats.backlog; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; From ef6aadcc76c97e25f62adc4e9d19684d3e5d0b87 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:06 +0200 Subject: [PATCH 02/14] net: sched: Make TBF Qdisc offloadable Invoke ndo_setup_tc as appropriate to signal init / replacement, destroying and dumping of TBF Qdisc. Signed-off-by: Petr Machata <petrm@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/linux/netdevice.h | 1 + include/net/pkt_cls.h | 22 ++++++++++++++++ net/sched/sch_tbf.c | 55 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ec3537fbdb1..11bdf6cb30bd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -850,6 +850,7 @@ enum tc_setup_type { TC_SETUP_QDISC_TAPRIO, TC_SETUP_FT, TC_SETUP_QDISC_ETS, + TC_SETUP_QDISC_TBF, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 47b115e2012a..ce036492986a 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -854,4 +854,26 @@ struct tc_ets_qopt_offload { }; }; +enum tc_tbf_command { + TC_TBF_REPLACE, + TC_TBF_DESTROY, + TC_TBF_STATS, +}; + +struct tc_tbf_qopt_offload_replace_params { + struct psched_ratecfg rate; + u32 max_size; + struct gnet_stats_queue *qstats; +}; + +struct tc_tbf_qopt_offload { + enum tc_tbf_command command; + u32 handle; + u32 parent; + union { + struct tc_tbf_qopt_offload_replace_params replace_params; + struct tc_qopt_offload_stats stats; + }; +}; + #endif diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 7ae317958090..78e79029dc63 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -15,6 +15,7 @@ #include <linux/skbuff.h> #include <net/netlink.h> #include <net/sch_generic.h> +#include <net/pkt_cls.h> #include <net/pkt_sched.h> @@ -137,6 +138,52 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, return len; } +static void tbf_offload_change(struct Qdisc *sch) +{ + struct tbf_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_tbf_qopt_offload qopt; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + qopt.command = TC_TBF_REPLACE; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + qopt.replace_params.rate = q->rate; + qopt.replace_params.max_size = q->max_size; + qopt.replace_params.qstats = &sch->qstats; + + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); +} + +static void tbf_offload_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_tbf_qopt_offload qopt; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return; + + qopt.command = TC_TBF_DESTROY; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TBF, &qopt); +} + +static int tbf_offload_dump(struct Qdisc *sch) +{ + struct tc_tbf_qopt_offload qopt; + + qopt.command = TC_TBF_STATS; + qopt.handle = sch->handle; + qopt.parent = sch->parent; + qopt.stats.bstats = &sch->bstats; + qopt.stats.qstats = &sch->qstats; + + return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_TBF, &qopt); +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -407,6 +454,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_unlock(sch); err = 0; + + tbf_offload_change(sch); done: return err; } @@ -432,6 +481,7 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); + tbf_offload_destroy(sch); qdisc_put(q->qdisc); } @@ -440,6 +490,11 @@ static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *nest; struct tc_tbf_qopt opt; + int err; + + err = tbf_offload_dump(sch); + if (err) + return err; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) From cf9af379cd672d6873ae97ec416148708c6575e9 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:07 +0200 Subject: [PATCH 03/14] mlxsw: spectrum_qdisc: Extract a per-TC stat function Extract from mlxsw_sp_qdisc_get_prio_stats() two new functions: mlxsw_sp_qdisc_collect_tc_stats() to accumulate stats for that one TC only, and mlxsw_sp_qdisc_update_stats() that makes the stats relative to base values stored earlier. Use them from mlxsw_sp_qdisc_get_red_stats(). Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 119 ++++++++++-------- 1 file changed, 70 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index d57c9b15f45e..ef63b8cbbc94 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -227,6 +227,52 @@ mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats, } } +static void +mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u64 *p_tx_bytes, u64 *p_tx_packets, + u64 *p_drops, u64 *p_backlog) +{ + u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + struct mlxsw_sp_port_xstats *xstats; + u64 tx_bytes, tx_packets; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + mlxsw_sp_qdisc_bstats_per_priority_get(xstats, + mlxsw_sp_qdisc->prio_bitmap, + &tx_packets, &tx_bytes); + + *p_tx_packets += tx_packets; + *p_tx_bytes += tx_bytes; + *p_drops += xstats->wred_drop[tclass_num] + + mlxsw_sp_xstats_tail_drop(xstats, tclass_num); + *p_backlog += mlxsw_sp_xstats_backlog(xstats, tclass_num); +} + +static void +mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + u64 tx_bytes, u64 tx_packets, + u64 drops, u64 backlog, + struct tc_qopt_offload_stats *stats_ptr) +{ + struct mlxsw_sp_qdisc_stats *stats_base = &mlxsw_sp_qdisc->stats_base; + + tx_bytes -= stats_base->tx_bytes; + tx_packets -= stats_base->tx_packets; + drops -= stats_base->drops; + backlog -= stats_base->backlog; + + _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + stats_ptr->qstats->drops += drops; + stats_ptr->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp, backlog); + + stats_base->backlog += backlog; + stats_base->drops += drops; + stats_base->tx_bytes += tx_bytes; + stats_base->tx_packets += tx_packets; +} + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, @@ -403,41 +449,30 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - u64 tx_bytes, tx_packets, overlimits, drops, backlog; u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 overlimits; + u64 drops = 0; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats_base = &mlxsw_sp_qdisc->stats_base; - mlxsw_sp_qdisc_bstats_per_priority_get(xstats, - mlxsw_sp_qdisc->prio_bitmap, - &tx_packets, &tx_bytes); - tx_bytes = tx_bytes - stats_base->tx_bytes; - tx_packets = tx_packets - stats_base->tx_packets; - + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; - drops = xstats->wred_drop[tclass_num] + - mlxsw_sp_xstats_tail_drop(xstats, tclass_num) - - stats_base->drops; - backlog = mlxsw_sp_xstats_backlog(xstats, tclass_num); - _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); stats_ptr->qstats->overlimits += overlimits; - stats_ptr->qstats->drops += drops; - stats_ptr->qstats->backlog += - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - backlog) - - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - stats_base->backlog); - - stats_base->backlog = backlog; - stats_base->drops += drops; stats_base->overlimits += overlimits; - stats_base->tx_bytes += tx_bytes; - stats_base->tx_packets += tx_packets; + return 0; } @@ -628,37 +663,23 @@ mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct tc_qopt_offload_stats *stats_ptr) { - u64 tx_bytes, tx_packets, drops = 0, backlog = 0; - struct mlxsw_sp_qdisc_stats *stats_base; - struct mlxsw_sp_port_xstats *xstats; - struct rtnl_link_stats64 *stats; + struct mlxsw_sp_qdisc *tc_qdisc; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 drops = 0; int i; - xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; - stats = &mlxsw_sp_port->periodic_hw_stats.stats; - stats_base = &mlxsw_sp_qdisc->stats_base; - - tx_bytes = stats->tx_bytes - stats_base->tx_bytes; - tx_packets = stats->tx_packets - stats_base->tx_packets; - for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - drops += mlxsw_sp_xstats_tail_drop(xstats, i); - drops += xstats->wred_drop[i]; - backlog += mlxsw_sp_xstats_backlog(xstats, i); + tc_qdisc = &mlxsw_sp_port->tclass_qdiscs[i]; + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); } - drops = drops - stats_base->drops; - _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); - stats_ptr->qstats->drops += drops; - stats_ptr->qstats->backlog += - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - backlog) - - mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - stats_base->backlog); - stats_base->backlog = backlog; - stats_base->drops += drops; - stats_base->tx_bytes += tx_bytes; - stats_base->tx_packets += tx_packets; + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); return 0; } From 3d0d592193dce954bf5ccac5128080830dd8e982 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:08 +0200 Subject: [PATCH 04/14] mlxsw: spectrum_qdisc: Add mlxsw_sp_qdisc_get_class_stats() Add a wrapper around mlxsw_sp_qdisc_collect_tc_stats() and mlxsw_sp_qdisc_update_stats() for the simple case of doing both in one go: mlxsw_sp_qdisc_get_class_stats(). Dispatch to that function from mlxsw_sp_qdisc_get_red_stats(). This new function will be useful for other leaf Qdiscs as well. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index ef63b8cbbc94..a0f07e951607 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -273,6 +273,24 @@ mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp, stats_base->tx_packets += tx_packets; } +static void +mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 backlog = 0; + u64 drops = 0; + + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog); + mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, + tx_bytes, tx_packets, drops, backlog, + stats_ptr); +} + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, @@ -452,24 +470,15 @@ mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; - u64 tx_packets = 0; - u64 tx_bytes = 0; - u64 backlog = 0; u64 overlimits; - u64 drops = 0; xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats_base = &mlxsw_sp_qdisc->stats_base; - mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, - &tx_bytes, &tx_packets, - &drops, &backlog); + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr); overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - stats_base->overlimits; - mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc, - tx_bytes, tx_packets, drops, backlog, - stats_ptr); stats_ptr->qstats->overlimits += overlimits; stats_base->overlimits += overlimits; From be1d5a8a7728cc734fc31b837796e7addc4ba832 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:09 +0200 Subject: [PATCH 05/14] mlxsw: spectrum_qdisc: Extract a common leaf unoffload function When the RED Qdisc is unoffloaded, it needs to reduce the reported backlog by the amount that is in the HW, so that only the SW backlog is contained in the counter. The same thing will need to be done by TBF, and likely any other leaf Qdisc as well. Extract a helper mlxsw_sp_qdisc_leaf_unoffload() and call it from mlxsw_sp_qdisc_red_unoffload(). Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index a0f07e951607..57b014a95bc8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -420,18 +420,27 @@ mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, max, prob, p->is_ecn); } +static void +mlxsw_sp_qdisc_leaf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct gnet_stats_queue *qstats) +{ + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + qstats->backlog -= backlog; + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + static void mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params) { struct tc_red_qopt_offload_params *p = params; - u64 backlog; - backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_qdisc->stats_base.backlog); - p->qstats->backlog -= backlog; - mlxsw_sp_qdisc->stats_base.backlog = 0; + mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats); } static int From 23effa2479bae2fbea69493afaab9914ee3862ca Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:10 +0200 Subject: [PATCH 06/14] mlxsw: reg: Add max_shaper_bs to QoS ETS Element Configuration The QEEC register configures scheduling elements. One of the bits of configuration is the burst size to use for the shaper installed on the element. Add the necessary fields to support this configuration. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 0b80e75e87c3..afd712d8fd46 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3602,6 +3602,21 @@ MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1); */ MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8); +/* reg_qeec_max_shaper_bs + * Max shaper burst size + * Burst size is 2^max_shaper_bs * 512 bits + * For Spectrum-1: Range is: 5..25 + * For Spectrum-2: Range is: 11..25 + * Reserved when ptps = 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6); + +#define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11 +#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5 + static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index) From 92afbfedb77d59a4077d292b88a316f3de706344 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:11 +0200 Subject: [PATCH 07/14] mlxsw: reg: Increase MLXSW_REG_QEEC_MAS_DIS As the port speeds grow, the current value of "unlimited shaper", 200000000Kbps, might become lower than the actually supported speeds. Bump it to the maximum value that fits in the corresponding QEEC field, which is about 2.1Tbps. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index afd712d8fd46..dd6685156396 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3563,8 +3563,8 @@ MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); */ MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); -/* A large max rate will disable the max shaper. */ -#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */ +/* The largest max shaper value possible to disable the shaper. */ +#define MLXSW_REG_QEEC_MAS_DIS ((1u << 31) - 1) /* Kbps */ /* reg_qeec_max_shaper_rate * Max shaper information rate. From 47259544e0fc0659f74f6ca6680745658969a803 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:12 +0200 Subject: [PATCH 08/14] mlxsw: spectrum: Add lowest_shaper_bs to struct mlxsw_sp Lower limit of burst size configuration is dependent on system type. Add a datum to track the value. Initialize as appropriate in mlxsw_spX_init(). Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 +++ drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 9eb3ac7669f7..72d24595df7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5173,6 +5173,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->span_ops = &mlxsw_sp1_span_ops; mlxsw_sp->listeners = mlxsw_sp1_listener; mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } @@ -5197,6 +5198,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } @@ -5219,6 +5221,7 @@ static int mlxsw_sp3_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; mlxsw_sp->span_ops = &mlxsw_sp2_span_ops; + mlxsw_sp->lowest_shaper_bs = MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info, extack); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5f3b74360dc8..bbc23939964e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -189,6 +189,7 @@ struct mlxsw_sp { const struct mlxsw_sp_span_ops *span_ops; const struct mlxsw_listener *listeners; size_t listeners_count; + u32 lowest_shaper_bs; }; static inline struct mlxsw_sp_upper * From dbacf8ba5860e79d5191b0868bffa9e351d205e4 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:13 +0200 Subject: [PATCH 09/14] mlxsw: spectrum: Configure shaper rate and burst size together In order to allow configuration of burst size together with shaper rate, extend mlxsw_sp_port_ets_maxrate_set() with a burst_size argument. Convert call sites to pass 0 (for default). Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 11 ++++++----- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c | 5 +++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 72d24595df7a..021664c62bba 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3577,7 +3577,7 @@ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, - u8 next_index, u32 maxrate) + u8 next_index, u32 maxrate, u8 burst_size) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char qeec_pl[MLXSW_REG_QEEC_LEN]; @@ -3586,6 +3586,7 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, next_index); mlxsw_reg_qeec_mase_set(qeec_pl, true); mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate); + mlxsw_reg_qeec_max_shaper_bs_set(qeec_pl, burst_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); } @@ -3654,14 +3655,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_PORT, 0, 0, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; } @@ -3669,14 +3670,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_TC, i, i, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_TC, i + 8, i, - MLXSW_REG_QEEC_MAS_DIS); + MLXSW_REG_QEEC_MAS_DIS, 0); if (err) return err; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index bbc23939964e..a2393c02a5e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -488,7 +488,7 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, struct ieee_pfc *my_pfc); int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, - u8 next_index, u32 maxrate); + u8 next_index, u32 maxrate, u8 burst_size); enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 stp_state); int mlxsw_sp_port_vid_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid, u8 state); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index db66f2b56a6d..49a72a8f1f57 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -526,7 +526,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, i, 0, - maxrate->tc_maxrate[i]); + maxrate->tc_maxrate[i], 0); if (err) { netdev_err(dev, "Failed to set maxrate for TC %d\n", i); goto err_port_ets_maxrate_set; @@ -541,7 +541,8 @@ err_port_ets_maxrate_set: for (i--; i >= 0; i--) mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HR_SUBGROUP, - i, 0, my_maxrate->tc_maxrate[i]); + i, 0, + my_maxrate->tc_maxrate[i], 0); return err; } From a44f58c41bfbb921c1c4586bed9ad26aa2c6208a Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:14 +0200 Subject: [PATCH 10/14] mlxsw: spectrum_qdisc: Support offloading of TBF Qdisc React to the TC messages that were introduced in a preceding patch and configure egress maximum shaper as appropriate. TBF can be used as a root qdisc or under one of PRIO or strict ETS bands. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Acked-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 2 + .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 + .../ethernet/mellanox/mlxsw/spectrum_qdisc.c | 199 ++++++++++++++++++ 3 files changed, 203 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 021664c62bba..7358b5bc7eb6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1796,6 +1796,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_ETS: return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_TBF: + return mlxsw_sp_setup_tc_tbf(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a2393c02a5e5..a0f1f9dceec5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -862,6 +862,8 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_ets_qopt_offload *p); +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p); /* spectrum_fid.c */ bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 57b014a95bc8..79a2801d59f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -19,6 +19,7 @@ enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_RED, MLXSW_SP_QDISC_PRIO, MLXSW_SP_QDISC_ETS, + MLXSW_SP_QDISC_TBF, }; struct mlxsw_sp_qdisc_ops { @@ -540,6 +541,204 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } } +static void +mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + u64 backlog_cells = 0; + u64 tx_packets = 0; + u64 tx_bytes = 0; + u64 drops = 0; + + mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &tx_bytes, &tx_packets, + &drops, &backlog_cells); + + mlxsw_sp_qdisc->stats_base.tx_packets = tx_packets; + mlxsw_sp_qdisc->stats_base.tx_bytes = tx_bytes; + mlxsw_sp_qdisc->stats_base.drops = drops; + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + +static int +mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc *root_qdisc = mlxsw_sp_port->root_qdisc; + + if (root_qdisc != mlxsw_sp_qdisc) + root_qdisc->stats_base.backlog -= + mlxsw_sp_qdisc->stats_base.backlog; + + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + mlxsw_sp_qdisc->tclass_num, 0, + MLXSW_REG_QEEC_MAS_DIS, 0); +} + +static int +mlxsw_sp_qdisc_tbf_bs(struct mlxsw_sp_port *mlxsw_sp_port, + u32 max_size, u8 *p_burst_size) +{ + /* TBF burst size is configured in bytes. The ASIC burst size value is + * ((2 ^ bs) * 512 bits. Convert the TBF bytes to 512-bit units. + */ + u32 bs512 = max_size / 64; + u8 bs = fls(bs512); + + if (!bs) + return -EINVAL; + --bs; + + /* Demand a power of two. */ + if ((1 << bs) != bs512) + return -EINVAL; + + if (bs < mlxsw_sp_port->mlxsw_sp->lowest_shaper_bs || + bs > MLXSW_REG_QEEC_HIGHEST_SHAPER_BS) + return -EINVAL; + + *p_burst_size = bs; + return 0; +} + +static u32 +mlxsw_sp_qdisc_tbf_max_size(u8 bs) +{ + return (1U << bs) * 64; +} + +static u64 +mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p) +{ + /* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in + * Kbits/s. + */ + return p->rate.rate_bytes_ps / 1000 * 8; +} + +static int +mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + u8 burst_size; + int err; + + if (rate_kbps >= MLXSW_REG_QEEC_MAS_DIS) { + dev_err(mlxsw_sp_port->mlxsw_sp->bus_info->dev, + "spectrum: TBF: rate of %lluKbps must be below %u\n", + rate_kbps, MLXSW_REG_QEEC_MAS_DIS); + return -EINVAL; + } + + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); + if (err) { + u8 highest_shaper_bs = MLXSW_REG_QEEC_HIGHEST_SHAPER_BS; + + dev_err(mlxsw_sp->bus_info->dev, + "spectrum: TBF: invalid burst size of %u, must be a power of two between %u and %u", + p->max_size, + mlxsw_sp_qdisc_tbf_max_size(mlxsw_sp->lowest_shaper_bs), + mlxsw_sp_qdisc_tbf_max_size(highest_shaper_bs)); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); + u8 burst_size; + int err; + + err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size); + if (WARN_ON_ONCE(err)) + /* check_params above was supposed to reject this value. */ + return -EINVAL; + + /* Configure subgroup shaper, so that both UC and MC traffic is subject + * to shaping. That is unlike RED, however UC queue lengths are going to + * be different than MC ones due to different pool and quota + * configurations, so the configuration is not applicable. For shaper on + * the other hand, subjecting the overall stream to the configured + * shaper makes sense. Also note that that is what we do for + * ieee_setmaxrate(). + */ + return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HR_SUBGROUP, + mlxsw_sp_qdisc->tclass_num, 0, + rate_kbps, burst_size); +} + +static void +mlxsw_sp_qdisc_tbf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_tbf_qopt_offload_replace_params *p = params; + + mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats); +} + +static int +mlxsw_sp_qdisc_get_tbf_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + stats_ptr); + return 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = { + .type = MLXSW_SP_QDISC_TBF, + .check_params = mlxsw_sp_qdisc_tbf_check_params, + .replace = mlxsw_sp_qdisc_tbf_replace, + .unoffload = mlxsw_sp_qdisc_tbf_unoffload, + .destroy = mlxsw_sp_qdisc_tbf_destroy, + .get_stats = mlxsw_sp_qdisc_get_tbf_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats, +}; + +int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_tbf_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false); + if (!mlxsw_sp_qdisc) + return -EOPNOTSUPP; + + if (p->command == TC_TBF_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_tbf, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_TBF)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_TBF_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_TBF_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + default: + return -EOPNOTSUPP; + } +} + static int __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port) { From adc6c7ec11293117ec2763ccbe17d58349e011cf Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:15 +0200 Subject: [PATCH 11/14] selftests: Move two functions from mlxsw's qos_lib to lib The function humanize() is used for converting value in bits/s to a human-friendly approximate value in Kbps, Mbps or Gbps. There is nothing hardware-specific in that, so move the function to lib.sh. Similarly for the rate() function, which just does a bit of math to calculate a rate, given two counter values and a time interval. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../selftests/drivers/net/mlxsw/qos_lib.sh | 24 ------------------- tools/testing/selftests/net/forwarding/lib.sh | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index a5937069ac16..faa51012cdac 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -1,29 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -humanize() -{ - local speed=$1; shift - - for unit in bps Kbps Mbps Gbps; do - if (($(echo "$speed < 1024" | bc))); then - break - fi - - speed=$(echo "scale=1; $speed / 1024" | bc) - done - - echo "$speed${unit}" -} - -rate() -{ - local t0=$1; shift - local t1=$1; shift - local interval=$1; shift - - echo $((8 * (t1 - t0) / interval)) -} - check_rate() { local rate=$1; shift diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8dc5fac98cbc..10cdfc0adca8 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -588,6 +588,30 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + mac_get() { local if_name=$1 From 4121d9479b24c20ff7d3e7b62a7cf330fae7b2af Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:16 +0200 Subject: [PATCH 12/14] selftests: forwarding: lib: Add helpers for busywaiting The function busywait() is handy as a safety-latched variant of a while loop. Many selftests deal specifically with counter values, and busywaiting on them is likely to be rather common (it is not quite common now, but busywait() has not been around for very long). To facilitate expressing simply what is tested, introduce two helpers: - until_counter_is(), which can be used as a predicate passed to busywait(), which holds when expression, which is itself passed as an argument to until_counter_is(), reaches a desired value. - busywait_for_counter(), which is useful for waiting until a given counter changes "by" (as opposed to "to") a certain amount. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/forwarding/lib.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 10cdfc0adca8..096340f064db 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -248,6 +248,24 @@ busywait() done } +until_counter_is() +{ + local value=$1; shift + local current=$("$@") + + echo $((current)) + ((current >= value)) +} + +busywait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + busywait "$timeout" until_counter_is $((base + delta)) "$@" +} + setup_wait_dev() { local dev=$1; shift From c143139b7fd36cb66afb4423ac14a1efd8a45609 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:17 +0200 Subject: [PATCH 13/14] selftests: forwarding: lib: Allow reading TC rule byte counters The function tc_rule_stats_get() fetches a packet counter of a given TC rule. Extend it to support byte counters as well by adding an optional argument with selector. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- tools/testing/selftests/net/forwarding/lib.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 096340f064db..2f5da414aaa7 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -593,9 +593,10 @@ tc_rule_stats_get() local dev=$1; shift local pref=$1; shift local dir=$1; shift + local selector=${1:-.packets}; shift tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ - | jq '.[1].options.actions[].stats.packets' + | jq ".[1].options.actions[].stats$selector" } ethtool_stats_get() From e814c58d454d952778fbb70bd0e71564f8c49f28 Mon Sep 17 00:00:00 2001 From: Petr Machata <petrm@mellanox.com> Date: Fri, 24 Jan 2020 15:23:18 +0200 Subject: [PATCH 14/14] selftests: mlxsw: Add a TBF selftest Add a test that runs traffic across a port throttled with TBF. The test checks that the observed throughput is within +-5% from the installed shaper. To allow checking both the software datapath and the offloaded one, make the test suitable for inclusion from driver-specific wrapper. Introduce such wrappers for mlxsw. Signed-off-by: Petr Machata <petrm@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../drivers/net/mlxsw/sch_tbf_ets.sh | 9 + .../drivers/net/mlxsw/sch_tbf_prio.sh | 9 + .../drivers/net/mlxsw/sch_tbf_root.sh | 9 + .../selftests/net/forwarding/sch_tbf_core.sh | 233 ++++++++++++++++++ .../selftests/net/forwarding/sch_tbf_ets.sh | 6 + .../net/forwarding/sch_tbf_etsprio.sh | 39 +++ .../selftests/net/forwarding/sch_tbf_prio.sh | 6 + .../selftests/net/forwarding/sch_tbf_root.sh | 33 +++ 8 files changed, 344 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh create mode 100755 tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh create mode 100644 tools/testing/selftests/net/forwarding/sch_tbf_core.sh create mode 100755 tools/testing/selftests/net/forwarding/sch_tbf_ets.sh create mode 100644 tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh create mode 100755 tools/testing/selftests/net/forwarding/sch_tbf_prio.sh create mode 100755 tools/testing/selftests/net/forwarding/sch_tbf_root.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh new file mode 100755 index 000000000000..c6ce0b448bf3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh new file mode 100755 index 000000000000..8d245f331619 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_prio.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh new file mode 100755 index 000000000000..013886061f15 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_root.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh new file mode 100644 index 000000000000..d1f26cb7cd73 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -0,0 +1,233 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends a stream of traffic from H1 through a switch, to H2. On the +# egress port from the switch ($swp2), a shaper is installed. The test verifies +# that the rates on the port match the configured shaper. +# +# In order to test per-class shaping, $swp2 actually contains TBF under PRIO or +# ETS, with two different configurations. Traffic is prioritized using 802.1p. +# +# +-------------------------------------------+ +# | H1 | +# | + $h1.10 $h1.11 + | +# | | 192.0.2.1/28 192.0.2.17/28 | | +# | | | | +# | \______________ _____________/ | +# | \ / | +# | + $h1 | +# +---------------------|---------------------+ +# | +# +---------------------|---------------------+ +# | SW + $swp1 | +# | _______________/ \_______________ | +# | / \ | +# | +-|--------------+ +--------------|-+ | +# | | + $swp1.10 | | $swp1.11 + | | +# | | | | | | +# | | BR10 | | BR11 | | +# | | | | | | +# | | + $swp2.10 | | $swp2.11 + | | +# | +-|--------------+ +--------------|-+ | +# | \_______________ ______________/ | +# | \ / | +# | + $swp2 | +# +---------------------|---------------------+ +# | +# +---------------------|---------------------+ +# | H2 + $h2 | +# | ______________/ \______________ | +# | / \ | +# | | | | +# | + $h2.10 $h2.11 + | +# | 192.0.2.2/28 192.0.2.18/28 | +# +-------------------------------------------+ + +NUM_NETIFS=4 +CHECK_TC="yes" +source $lib_dir/lib.sh + +ipaddr() +{ + local host=$1; shift + local vlan=$1; shift + + echo 192.0.2.$((16 * (vlan - 10) + host)) +} + +host_create() +{ + local dev=$1; shift + local host=$1; shift + + simple_if_init $dev + mtu_set $dev 10000 + + vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + ip link set dev $dev.10 type vlan egress 0:0 + + vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + ip link set dev $dev.11 type vlan egress 0:1 +} + +host_destroy() +{ + local dev=$1; shift + + vlan_destroy $dev 11 + vlan_destroy $dev 10 + mtu_restore $dev + simple_if_fini $dev +} + +h1_create() +{ + host_create $h1 1 +} + +h1_destroy() +{ + host_destroy $h1 +} + +h2_create() +{ + host_create $h2 2 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ + flower $TCFLAGS vlan_id 10 action pass + tc filter add dev $h2 ingress pref 1011 prot 802.1q \ + flower $TCFLAGS vlan_id 11 action pass +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + host_destroy $h2 +} + +switch_create() +{ + local intf + local vlan + + ip link add dev br10 type bridge + ip link add dev br11 type bridge + + for intf in $swp1 $swp2; do + ip link set dev $intf up + mtu_set $intf 10000 + + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br$vlan + ip link set dev $intf.$vlan up + done + done + + for vlan in 10 11; do + ip link set dev $swp1.$vlan type vlan ingress 0:0 1:1 + done + + ip link set dev br10 up + ip link set dev br11 up +} + +switch_destroy() +{ + local intf + local vlan + + # A test may have been interrupted mid-run, with Qdisc installed. Delete + # it here. + tc qdisc del dev $swp2 root 2>/dev/null + + ip link set dev br11 down + ip link set dev br10 down + + for intf in $swp2 $swp1; do + for vlan in 11 10; do + ip link set dev $intf.$vlan down + ip link set dev $intf.$vlan nomaster + vlan_destroy $intf $vlan + done + + mtu_restore $intf + ip link set dev $intf down + done + + ip link del dev br11 + ip link del dev br10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + swp4=${NETIFS[p7]} + swp5=${NETIFS[p8]} + + h2_mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(ipaddr 2 10) " vlan 10" + ping_test $h1.11 $(ipaddr 2 11) " vlan 11" +} + +tbf_get_counter() +{ + local vlan=$1; shift + + tc_rule_stats_get $h2 10$vlan ingress .bytes +} + +do_tbf_test() +{ + local vlan=$1; shift + local mbit=$1; shift + + start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + sleep 5 # Wait for the burst to dwindle + + local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) + sleep 10 + local t3=$(tbf_get_counter $vlan) + stop_traffic + + RET=0 + + # Note: TBF uses 10^6 Mbits, not 2^20 ones. + local er=$((mbit * 1000 * 1000)) + local nr=$(rate $t2 $t3 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-5 <= nr_pct && nr_pct <= 5)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." + + log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh new file mode 100755 index 000000000000..84fb6cab88e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC="ets strict" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_etsprio.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh new file mode 100644 index 000000000000..8bd85da1905a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + tbf_test +" +source $lib_dir/sch_tbf_core.sh + +tbf_test_one() +{ + local bs=$1; shift + + tc qdisc replace dev $swp2 parent 10:3 handle 103: tbf \ + rate 400Mbit burst $bs limit 1M + tc qdisc replace dev $swp2 parent 10:2 handle 102: tbf \ + rate 800Mbit burst $bs limit 1M + + do_tbf_test 10 400 $bs + do_tbf_test 11 800 $bs +} + +tbf_test() +{ + # This test is used for both ETS and PRIO. Even though we only need two + # bands, PRIO demands a minimum of three. + tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + tbf_test_one 128K + tc qdisc del dev $swp2 root +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh new file mode 100755 index 000000000000..9c8cb1cb9ba4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC="prio bands" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_etsprio.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh new file mode 100755 index 000000000000..72aa21ba88c7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + tbf_test +" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_core.sh + +tbf_test_one() +{ + local bs=$1; shift + + tc qdisc replace dev $swp2 root handle 108: tbf \ + rate 400Mbit burst $bs limit 1M + do_tbf_test 10 400 $bs +} + +tbf_test() +{ + tbf_test_one 128K + tc qdisc del dev $swp2 root +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS