diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h index 28427f0758c7..31c886edc791 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/item.h +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -42,7 +42,7 @@ struct mlxsw_item { unsigned short offset; /* bytes in container */ - unsigned short step; /* step in bytes for indexed items */ + short step; /* step in bytes for indexed items */ unsigned short in_step_offset; /* offset within one step */ unsigned char shift; /* shift in bits */ unsigned char element_size; /* size of element in bit array */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6c4e08b8058a..0e08be41c8e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4827,6 +4827,42 @@ static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, mlxsw_reg_ratr_counter_set_type_set(payload, set_type); } +/* RDPM - Router DSCP to Priority Mapping + * -------------------------------------- + * Controls the mapping from DSCP field to switch priority on routed packets + */ +#define MLXSW_REG_RDPM_ID 0x8009 +#define MLXSW_REG_RDPM_BASE_LEN 0x00 +#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN 0x01 +#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT 64 +#define MLXSW_REG_RDPM_LEN 0x40 +#define MLXSW_REG_RDPM_LAST_ENTRY (MLXSW_REG_RDPM_BASE_LEN + \ + MLXSW_REG_RDPM_LEN - \ + MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN) + +MLXSW_REG_DEFINE(rdpm, MLXSW_REG_RDPM_ID, MLXSW_REG_RDPM_LEN); + +/* reg_dscp_entry_e + * Enable update of the specific entry + * Access: Index + */ +MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_e, MLXSW_REG_RDPM_LAST_ENTRY, 7, 1, + -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +/* reg_dscp_entry_prio + * Switch Priority + * Access: RW + */ +MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_prio, MLXSW_REG_RDPM_LAST_ENTRY, 0, 4, + -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_rdpm_pack(char *payload, unsigned short index, + u8 prio) +{ + mlxsw_reg_rdpm_dscp_entry_e_set(payload, index, 1); + mlxsw_reg_rdpm_dscp_entry_prio_set(payload, index, prio); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -7640,6 +7676,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), + MLXSW_REG(rdpm), MLXSW_REG(ricnt), MLXSW_REG(rrcr), MLXSW_REG(ralta), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 54c7d9202e81..f78bfe394966 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1830,6 +1830,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_RED: return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_PRIO: + return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b6f475e83474..16f8fbda0891 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -565,6 +565,8 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port); void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p); +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 971f689dd833..e11a0abfc663 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -41,9 +41,12 @@ #include "spectrum.h" #include "reg.h" +#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1) + enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, MLXSW_SP_QDISC_RED, + MLXSW_SP_QDISC_PRIO, }; struct mlxsw_sp_qdisc_ops { @@ -63,6 +66,11 @@ struct mlxsw_sp_qdisc_ops { void *xstats_ptr); void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); + /* unoffload - to be used for a qdisc that stops being offloaded without + * being destroyed. + */ + void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); }; struct mlxsw_sp_qdisc { @@ -76,6 +84,7 @@ struct mlxsw_sp_qdisc { u64 tx_packets; u64 drops; u64 overlimits; + u64 backlog; } stats_base; struct mlxsw_sp_qdisc_ops *ops; @@ -141,6 +150,9 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, err_bad_param: err_config: + if (mlxsw_sp_qdisc->handle == handle && ops->unoffload) + ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params); + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); return err; } @@ -403,6 +415,165 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } } +static int +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + MLXSW_SP_PORT_DEFAULT_TCLASS); + + return 0; +} + +static int +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + + if (p->bands > IEEE_8021QAZ_MAX_TCS) + return -EOPNOTSUPP; + + return 0; +} + +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + int tclass, i; + int err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]); + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass); + if (err) + return err; + } + + return 0; +} + +void +mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + p->qstats->backlog -= backlog; +} + +static int +mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + u64 tx_bytes, tx_packets, drops = 0, backlog = 0; + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + int i; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base; + + tx_bytes = stats->tx_bytes - stats_base->tx_bytes; + tx_packets = stats->tx_packets - stats_base->tx_packets; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + drops += xstats->tail_drop[i]; + backlog += xstats->backlog[i]; + } + drops = drops - stats_base->drops; + + _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + stats_ptr->qstats->drops += drops; + stats_ptr->qstats->backlog += + mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + backlog) - + mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + stats_base->backlog); + stats_base->backlog = backlog; + stats_base->drops += drops; + stats_base->tx_bytes += tx_bytes; + stats_base->tx_packets += tx_packets; + return 0; +} + +static void +mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + int i; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base; + + stats_base->tx_packets = stats->tx_packets; + stats_base->tx_bytes = stats->tx_bytes; + + stats_base->drops = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + stats_base->drops += xstats->tail_drop[i]; + + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { + .type = MLXSW_SP_QDISC_PRIO, + .check_params = mlxsw_sp_qdisc_prio_check_params, + .replace = mlxsw_sp_qdisc_prio_replace, + .unoffload = mlxsw_sp_qdisc_prio_unoffload, + .destroy = mlxsw_sp_qdisc_prio_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, +}; + +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + if (p->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; + if (p->command == TC_PRIO_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_prio, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_PRIO)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_PRIO_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_PRIO_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + default: + return -EOPNOTSUPP; + } +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7a136256b8f7..01ff5ba6796e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7011,6 +7011,24 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) } #endif +static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) +{ + char rdpm_pl[MLXSW_REG_RDPM_LEN]; + unsigned int i; + + MLXSW_REG_ZERO(rdpm, rdpm_pl); + + /* HW is determining switch priority based on DSCP-bits, but the + * kernel is still doing that based on the ToS. Since there's a + * mismatch in bits we need to make sure to translate the right + * value ToS would observe, skipping the 2 least-significant ECN bits. + */ + for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++) + mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl); +} + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -7023,6 +7041,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); + mlxsw_reg_rgcr_usp_set(rgcr_pl, true); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) return err; @@ -7098,6 +7117,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_mp_hash_init; + err = mlxsw_sp_dscp_init(mlxsw_sp); + if (err) + goto err_dscp_init; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); @@ -7107,6 +7130,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return 0; err_register_fib_notifier: +err_dscp_init: err_mp_hash_init: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef7b348e8498..6d95477b962c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -780,6 +780,7 @@ enum tc_setup_type { TC_SETUP_BLOCK, TC_SETUP_QDISC_CBS, TC_SETUP_QDISC_RED, + TC_SETUP_QDISC_PRIO, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0d1343cba84c..9c341f003091 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -761,4 +761,29 @@ struct tc_red_qopt_offload { }; }; +enum tc_prio_command { + TC_PRIO_REPLACE, + TC_PRIO_DESTROY, + TC_PRIO_STATS, +}; + +struct tc_prio_qopt_offload_params { + int bands; + u8 priomap[TC_PRIO_MAX + 1]; + /* In case that a prio qdisc is offloaded and now is changed to a + * non-offloadedable config, it needs to update the backlog & qlen + * values to negate the HW backlog & qlen values (and only them). + */ + struct gnet_stats_queue *qstats; +}; + +struct tc_prio_qopt_offload { + enum tc_prio_command command; + u32 handle; + u32 parent; + union { + struct tc_prio_qopt_offload_params replace_params; + struct tc_qopt_offload_stats stats; + }; +}; #endif diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fe1510eb111f..a398502899a9 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } +static int prio_offload(struct Qdisc *sch, bool enable) +{ + struct prio_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_prio_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_PRIO_REPLACE; + opt.replace_params.bands = q->bands; + memcpy(&opt.replace_params.priomap, q->prio2band, + TC_PRIO_MAX + 1); + opt.replace_params.qstats = &sch->qstats; + } else { + opt.command = TC_PRIO_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt); +} + static void prio_destroy(struct Qdisc *sch) { @@ -149,6 +174,7 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); + prio_offload(sch, false); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -204,6 +230,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); + prio_offload(sch, true); return 0; } @@ -223,15 +250,47 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt, return prio_tune(sch, opt, extack); } +static int prio_dump_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_prio_qopt_offload hw_stats = { + .handle = sch->handle, + .parent = sch->parent, + .command = TC_PRIO_STATS, + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }; + int err; + + sch->flags &= ~TCQ_F_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + sch->flags |= TCQ_F_OFFLOADED; + + return err; +} + static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_prio_qopt opt; + int err; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); + err = prio_dump_offload(sch); + if (err) + goto nla_put_failure; + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure;