Merge branch 'mlxsw-Further-MC-awareness-configuration'

Ido Schimmel says:

====================
mlxsw: Further MC-awareness configuration

Petr says:

Due to an issue in Spectrum chips, when unicast traffic shares the same
queue as BUM traffic, and there is congestion, the BUM traffic is
admitted to the queue anyway, thus pushing out all UC traffic. In order
to give unicast traffic precedence over BUM traffic, multicast-aware
mode is now configured on all ports. Under MC-aware mode, egress TCs
8..15 are used for BUM traffic, which has its own dedicated pool.

This patch set improves the way that the MC pool and the higher-order
TCs are integrated into the system.

In patch #1, shaper at the higher TCs is configured to the same value
that it has by default. It's better to have the corresponding artifact
in the code explicitly.

The 8 following patches gradually extend the devlink handling in mlxsw
to support the extra TCs and the new MC pool.

Patch #2 changes the way that pools are indexed in mlxsw. Instead of
using (FW index, direction) tuple to identify the pool and the
associated cache, mlxsw now uses devlink index. This change is necessary
because the new pool 15 is not contiguously adjacent to the
currently-used pools 0..3, and because it's only relevant on egress.
Using devlink index relaxes the requirement for symmetry and adjacency
imposed by using FW indexing.

In patch #3, the assumption that number of ingress TCs matches that of
egress TCs is relaxed to allow exposition of egress TCs 8..15.

In patches #4, #5 and #6, support for infinite quotas is introduced.
Infinite quotas are reported as taking all the memory in the system, but
actually use a mechanism where the infinity is configured explicitly.

In patches #7 and #8, support for configuring static pool sizes in
introduced. Statically-sized pools have been supported for a while now,
but during initialization, all pools have dynamic size. The patches
allow there to be a mix of by-default static and dynamic pools.

In patches #9 and #10, pool 15 resp. per-priority MC quotas are
explicitly configured to be in sync with the current recommendation for
handling BUM traffic in Spectrum chips.

In the following 3 patches, an mlxsw-specific selftest is added to test
the MC-awareness configuration.

First in patches #11 and #12, lib.sh is extended with functions to
collect ethtool stats, and to manage port MTU.

Then in patch #13 the selftest itself is added.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-09-20 07:46:02 -07:00
commit cab9572a09
5 changed files with 656 additions and 231 deletions

View File

@ -8336,8 +8336,15 @@ MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2);
*/
MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4);
/* reg_sbpr_infi_size
* Size is infinite.
* Access: RW
*/
MLXSW_ITEM32(reg, sbpr, infi_size, 0x04, 31, 1);
/* reg_sbpr_size
* Pool size in buffer cells.
* Reserved when infi_size = 1.
* Access: RW
*/
MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24);
@ -8355,13 +8362,15 @@ MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4);
static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool,
enum mlxsw_reg_sbxx_dir dir,
enum mlxsw_reg_sbpr_mode mode, u32 size)
enum mlxsw_reg_sbpr_mode mode, u32 size,
bool infi_size)
{
MLXSW_REG_ZERO(sbpr, payload);
mlxsw_reg_sbpr_pool_set(payload, pool);
mlxsw_reg_sbpr_dir_set(payload, dir);
mlxsw_reg_sbpr_mode_set(payload, mode);
mlxsw_reg_sbpr_size_set(payload, size);
mlxsw_reg_sbpr_infi_size_set(payload, infi_size);
}
/* SBCM - Shared Buffer Class Management Register
@ -8409,6 +8418,12 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14
/* reg_sbcm_infi_max
* Max buffer is infinite.
* Access: RW
*/
MLXSW_ITEM32(reg, sbcm, infi_max, 0x1C, 31, 1);
/* reg_sbcm_max_buff
* When the pool associated to the port-pg/tclass is configured to
* static, Maximum buffer size for the limiter configured in cells.
@ -8418,6 +8433,7 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);
* 0: 0
* i: (1/128)*2^(i-1), for i=1..14
* 0xFF: Infinity
* Reserved when infi_max = 1.
* Access: RW
*/
MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24);
@ -8430,7 +8446,8 @@ MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4);
static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
enum mlxsw_reg_sbxx_dir dir,
u32 min_buff, u32 max_buff, u8 pool)
u32 min_buff, u32 max_buff,
bool infi_max, u8 pool)
{
MLXSW_REG_ZERO(sbcm, payload);
mlxsw_reg_sbcm_local_port_set(payload, local_port);
@ -8438,6 +8455,7 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
mlxsw_reg_sbcm_dir_set(payload, dir);
mlxsw_reg_sbcm_min_buff_set(payload, min_buff);
mlxsw_reg_sbcm_max_buff_set(payload, max_buff);
mlxsw_reg_sbcm_infi_max_set(payload, infi_max);
mlxsw_reg_sbcm_pool_set(payload, pool);
}

View File

@ -2804,6 +2804,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
MLXSW_REG_QEEC_MAS_DIS);
if (err)
return err;
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HIERARCY_TC,
i + 8, i,
MLXSW_REG_QEEC_MAS_DIS);
if (err)
return err;
}
/* Map all priorities to traffic class 0. */

View File

@ -25,28 +25,52 @@ struct mlxsw_cp_sb_occ {
struct mlxsw_sp_sb_cm {
u32 min_buff;
u32 max_buff;
u8 pool;
u16 pool_index;
struct mlxsw_cp_sb_occ occ;
};
#define MLXSW_SP_SB_INFI -1U
struct mlxsw_sp_sb_pm {
u32 min_buff;
u32 max_buff;
struct mlxsw_cp_sb_occ occ;
};
#define MLXSW_SP_SB_POOL_COUNT 4
#define MLXSW_SP_SB_TC_COUNT 8
struct mlxsw_sp_sb_pool_des {
enum mlxsw_reg_sbxx_dir dir;
u8 pool;
};
/* Order ingress pools before egress pools. */
static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = {
{MLXSW_REG_SBXX_DIR_INGRESS, 0},
{MLXSW_REG_SBXX_DIR_INGRESS, 1},
{MLXSW_REG_SBXX_DIR_INGRESS, 2},
{MLXSW_REG_SBXX_DIR_INGRESS, 3},
{MLXSW_REG_SBXX_DIR_EGRESS, 0},
{MLXSW_REG_SBXX_DIR_EGRESS, 1},
{MLXSW_REG_SBXX_DIR_EGRESS, 2},
{MLXSW_REG_SBXX_DIR_EGRESS, 3},
{MLXSW_REG_SBXX_DIR_EGRESS, 15},
};
#define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess)
#define MLXSW_SP_SB_ING_TC_COUNT 8
#define MLXSW_SP_SB_EG_TC_COUNT 16
struct mlxsw_sp_sb_port {
struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT];
struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT];
struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN];
};
struct mlxsw_sp_sb {
struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT];
struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN];
struct mlxsw_sp_sb_port *ports;
u32 cell_size;
u64 sb_size;
};
u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells)
@ -60,95 +84,122 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes)
}
static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp,
u8 pool,
enum mlxsw_reg_sbxx_dir dir)
u16 pool_index)
{
return &mlxsw_sp->sb->prs[dir][pool];
return &mlxsw_sp->sb->prs[pool_index];
}
static bool mlxsw_sp_sb_cm_exists(u8 pg_buff, enum mlxsw_reg_sbxx_dir dir)
{
if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
return pg_buff < MLXSW_SP_SB_ING_TC_COUNT;
else
return pg_buff < MLXSW_SP_SB_EG_TC_COUNT;
}
static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp,
u8 local_port, u8 pg_buff,
enum mlxsw_reg_sbxx_dir dir)
{
return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff];
struct mlxsw_sp_sb_port *sb_port = &mlxsw_sp->sb->ports[local_port];
WARN_ON(!mlxsw_sp_sb_cm_exists(pg_buff, dir));
if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
return &sb_port->ing_cms[pg_buff];
else
return &sb_port->eg_cms[pg_buff];
}
static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp,
u8 local_port, u8 pool,
enum mlxsw_reg_sbxx_dir dir)
u8 local_port, u16 pool_index)
{
return &mlxsw_sp->sb->ports[local_port].pms[dir][pool];
return &mlxsw_sp->sb->ports[local_port].pms[pool_index];
}
static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool,
enum mlxsw_reg_sbxx_dir dir,
enum mlxsw_reg_sbpr_mode mode, u32 size)
static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
enum mlxsw_reg_sbpr_mode mode,
u32 size, bool infi_size)
{
const struct mlxsw_sp_sb_pool_des *des =
&mlxsw_sp_sb_pool_dess[pool_index];
char sbpr_pl[MLXSW_REG_SBPR_LEN];
struct mlxsw_sp_sb_pr *pr;
int err;
mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size);
mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode,
size, infi_size);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl);
if (err)
return err;
pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
if (infi_size)
size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp->sb->sb_size);
pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
pr->mode = mode;
pr->size = size;
return 0;
}
static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
u8 pg_buff, enum mlxsw_reg_sbxx_dir dir,
u32 min_buff, u32 max_buff, u8 pool)
u8 pg_buff, u32 min_buff, u32 max_buff,
bool infi_max, u16 pool_index)
{
const struct mlxsw_sp_sb_pool_des *des =
&mlxsw_sp_sb_pool_dess[pool_index];
char sbcm_pl[MLXSW_REG_SBCM_LEN];
struct mlxsw_sp_sb_cm *cm;
int err;
mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir,
min_buff, max_buff, pool);
mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir,
min_buff, max_buff, infi_max, des->pool);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl);
if (err)
return err;
if (pg_buff < MLXSW_SP_SB_TC_COUNT) {
struct mlxsw_sp_sb_cm *cm;
cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir);
if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) {
if (infi_max)
max_buff = mlxsw_sp_bytes_cells(mlxsw_sp,
mlxsw_sp->sb->sb_size);
cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff,
des->dir);
cm->min_buff = min_buff;
cm->max_buff = max_buff;
cm->pool = pool;
cm->pool_index = pool_index;
}
return 0;
}
static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
u8 pool, enum mlxsw_reg_sbxx_dir dir,
u32 min_buff, u32 max_buff)
u16 pool_index, u32 min_buff, u32 max_buff)
{
const struct mlxsw_sp_sb_pool_des *des =
&mlxsw_sp_sb_pool_dess[pool_index];
char sbpm_pl[MLXSW_REG_SBPM_LEN];
struct mlxsw_sp_sb_pm *pm;
int err;
mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false,
mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false,
min_buff, max_buff);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl);
if (err)
return err;
pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index);
pm->min_buff = min_buff;
pm->max_buff = max_buff;
return 0;
}
static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port,
u8 pool, enum mlxsw_reg_sbxx_dir dir,
struct list_head *bulk_list)
u16 pool_index, struct list_head *bulk_list)
{
const struct mlxsw_sp_sb_pool_des *des =
&mlxsw_sp_sb_pool_dess[pool_index];
char sbpm_pl[MLXSW_REG_SBPM_LEN];
mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0);
mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir,
true, 0, 0);
return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
bulk_list, NULL, 0);
}
@ -163,14 +214,16 @@ static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core,
}
static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port,
u8 pool, enum mlxsw_reg_sbxx_dir dir,
struct list_head *bulk_list)
u16 pool_index, struct list_head *bulk_list)
{
const struct mlxsw_sp_sb_pool_des *des =
&mlxsw_sp_sb_pool_dess[pool_index];
char sbpm_pl[MLXSW_REG_SBPM_LEN];
struct mlxsw_sp_sb_pm *pm;
pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0);
pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index);
mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir,
false, 0, 0);
return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
bulk_list,
mlxsw_sp_sb_pm_occ_query_cb,
@ -254,63 +307,54 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
.size = _size, \
}
static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = {
static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = {
/* Ingress pools. */
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
MLXSW_SP_SB_PR_INGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
MLXSW_SP_SB_PR_INGRESS_MNG_SIZE),
};
#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress)
static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = {
/* Egress pools. */
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI),
};
#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress)
#define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs)
static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
enum mlxsw_reg_sbxx_dir dir,
const struct mlxsw_sp_sb_pr *prs,
size_t prs_len)
static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
const struct mlxsw_sp_sb_pr *prs,
size_t prs_len)
{
int i;
int err;
for (i = 0; i < prs_len; i++) {
u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size);
u32 size = prs[i].size;
u32 size_cells;
err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size);
if (size == MLXSW_SP_SB_INFI) {
err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
0, true);
} else {
size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
size_cells, false);
}
if (err)
return err;
}
return 0;
}
static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp)
{
int err;
err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS,
mlxsw_sp_sb_prs_ingress,
MLXSW_SP_SB_PRS_INGRESS_LEN);
if (err)
return err;
return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS,
mlxsw_sp_sb_prs_egress,
MLXSW_SP_SB_PRS_EGRESS_LEN);
}
#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \
{ \
.min_buff = _min_buff, \
.max_buff = _max_buff, \
.pool = _pool, \
.pool_index = _pool, \
}
static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
@ -329,38 +373,38 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress)
static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(1500, 9, 0),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(0, 140000, 15),
MLXSW_SP_SB_CM(1, 0xff, 0),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(1500, 9, 4),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
MLXSW_SP_SB_CM(1, 0xff, 4),
};
#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress)
#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0)
#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4)
static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
@ -390,6 +434,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \
ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms)
static bool
mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index)
{
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
return pr->mode == MLXSW_REG_SBPR_MODE_STATIC;
}
static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
enum mlxsw_reg_sbxx_dir dir,
const struct mlxsw_sp_sb_cm *cms,
@ -401,16 +453,29 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
for (i = 0; i < cms_len; i++) {
const struct mlxsw_sp_sb_cm *cm;
u32 min_buff;
u32 max_buff;
if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS)
continue; /* PG number 8 does not exist, skip it */
cm = &cms[i];
/* All pools are initialized using dynamic thresholds,
* therefore 'max_buff' isn't specified in cells.
*/
if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir))
continue;
min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff);
err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir,
min_buff, cm->max_buff, cm->pool);
max_buff = cm->max_buff;
if (max_buff == MLXSW_SP_SB_INFI) {
err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i,
min_buff, 0,
true, cm->pool_index);
} else {
if (mlxsw_sp_sb_pool_is_static(mlxsw_sp,
cm->pool_index))
max_buff = mlxsw_sp_bytes_cells(mlxsw_sp,
max_buff);
err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i,
min_buff, max_buff,
false, cm->pool_index);
}
if (err)
return err;
}
@ -448,91 +513,74 @@ static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp)
.max_buff = _max_buff, \
}
static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = {
static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = {
/* Ingress pools. */
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
};
#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress)
static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = {
/* Egress pools. */
MLXSW_SP_SB_PM(0, 7),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(10000, 90000),
};
#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress)
#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms)
static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
enum mlxsw_reg_sbxx_dir dir,
const struct mlxsw_sp_sb_pm *pms,
size_t pms_len)
static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
int i;
int err;
for (i = 0; i < pms_len; i++) {
const struct mlxsw_sp_sb_pm *pm;
for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) {
const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i];
u32 max_buff;
u32 min_buff;
pm = &pms[i];
err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir,
pm->min_buff, pm->max_buff);
min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff);
max_buff = pm->max_buff;
if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i))
max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff);
err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port,
i, min_buff, max_buff);
if (err)
return err;
}
return 0;
}
static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
int err;
err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_port->local_port,
MLXSW_REG_SBXX_DIR_INGRESS,
mlxsw_sp_sb_pms_ingress,
MLXSW_SP_SB_PMS_INGRESS_LEN);
if (err)
return err;
return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
mlxsw_sp_port->local_port,
MLXSW_REG_SBXX_DIR_EGRESS,
mlxsw_sp_sb_pms_egress,
MLXSW_SP_SB_PMS_EGRESS_LEN);
}
struct mlxsw_sp_sb_mm {
u32 min_buff;
u32 max_buff;
u8 pool;
u16 pool_index;
};
#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \
{ \
.min_buff = _min_buff, \
.max_buff = _max_buff, \
.pool = _pool, \
.pool_index = _pool, \
}
static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(20000, 0xff, 0),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
MLXSW_SP_SB_MM(0, 6, 4),
};
#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms)
@ -544,16 +592,18 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
int err;
for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) {
const struct mlxsw_sp_sb_pool_des *des;
const struct mlxsw_sp_sb_mm *mc;
u32 min_buff;
mc = &mlxsw_sp_sb_mms[i];
/* All pools are initialized using dynamic thresholds,
* therefore 'max_buff' isn't specified in cells.
des = &mlxsw_sp_sb_pool_dess[mc->pool_index];
/* All pools used by sb_mm's are initialized using dynamic
* thresholds, therefore 'max_buff' isn't specified in cells.
*/
min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff);
mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff,
mc->pool);
des->pool);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl);
if (err)
return err;
@ -561,9 +611,24 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
return 0;
}
static void mlxsw_sp_pool_count(u16 *p_ingress_len, u16 *p_egress_len)
{
int i;
for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; ++i)
if (mlxsw_sp_sb_pool_dess[i].dir == MLXSW_REG_SBXX_DIR_EGRESS)
goto out;
WARN(1, "No egress pools\n");
out:
*p_ingress_len = i;
*p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i;
}
int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
{
u64 sb_size;
u16 ing_pool_count;
u16 eg_pool_count;
int err;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
@ -571,17 +636,19 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
return -EIO;
sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE);
mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL);
if (!mlxsw_sp->sb)
return -ENOMEM;
mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
MAX_BUFFER_SIZE);
err = mlxsw_sp_sb_ports_init(mlxsw_sp);
if (err)
goto err_sb_ports_init;
err = mlxsw_sp_sb_prs_init(mlxsw_sp);
err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp_sb_prs,
MLXSW_SP_SB_PRS_LEN);
if (err)
goto err_sb_prs_init;
err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp);
@ -590,11 +657,13 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
err = mlxsw_sp_sb_mms_init(mlxsw_sp);
if (err)
goto err_sb_mms_init;
err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size,
MLXSW_SP_SB_POOL_COUNT,
MLXSW_SP_SB_POOL_COUNT,
MLXSW_SP_SB_TC_COUNT,
MLXSW_SP_SB_TC_COUNT);
mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count);
err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0,
mlxsw_sp->sb->sb_size,
ing_pool_count,
eg_pool_count,
MLXSW_SP_SB_ING_TC_COUNT,
MLXSW_SP_SB_EG_TC_COUNT);
if (err)
goto err_devlink_sb_register;
@ -632,36 +701,15 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
static u8 pool_get(u16 pool_index)
{
return pool_index % MLXSW_SP_SB_POOL_COUNT;
}
static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir)
{
u16 pool_index;
pool_index = pool;
if (dir == MLXSW_REG_SBXX_DIR_EGRESS)
pool_index += MLXSW_SP_SB_POOL_COUNT;
return pool_index;
}
static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index)
{
return pool_index < MLXSW_SP_SB_POOL_COUNT ?
MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS;
}
int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info)
{
enum mlxsw_reg_sbxx_dir dir = mlxsw_sp_sb_pool_dess[pool_index].dir;
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
u8 pool = pool_get(pool_index);
enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
struct mlxsw_sp_sb_pr *pr;
pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
pool_info->pool_type = (enum devlink_sb_pool_type) dir;
pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size);
pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
@ -674,34 +722,32 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size);
u8 pool = pool_get(pool_index);
enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
enum mlxsw_reg_sbpr_mode mode;
if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
return -EINVAL;
mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode,
pool_size, false);
}
#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */
static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool,
enum mlxsw_reg_sbxx_dir dir, u32 max_buff)
static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
u32 max_buff)
{
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC)
return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff);
}
static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool,
enum mlxsw_reg_sbxx_dir dir, u32 threshold,
u32 *p_max_buff)
static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
u32 threshold, u32 *p_max_buff)
{
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) {
int val;
@ -725,12 +771,10 @@ int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
u8 pool = pool_get(pool_index);
enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
pool, dir);
pool_index);
*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir,
*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool_index,
pm->max_buff);
return 0;
}
@ -743,17 +787,15 @@ int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
u8 pool = pool_get(pool_index);
enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
u32 max_buff;
int err;
err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
threshold, &max_buff);
if (err)
return err;
return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir,
return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool_index,
0, max_buff);
}
@ -771,9 +813,9 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
pg_buff, dir);
*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir,
*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool_index,
cm->max_buff);
*p_pool_index = pool_index_get(cm->pool, dir);
*p_pool_index = cm->pool_index;
return 0;
}
@ -788,24 +830,24 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
u8 local_port = mlxsw_sp_port->local_port;
u8 pg_buff = tc_index;
enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
u8 pool = pool_get(pool_index);
u32 max_buff;
int err;
if (dir != dir_get(pool_index))
if (dir != mlxsw_sp_sb_pool_dess[pool_index].dir)
return -EINVAL;
err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
threshold, &max_buff);
if (err)
return err;
return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
0, max_buff, pool);
return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff,
0, max_buff, false, pool_index);
}
#define MASKED_COUNT_MAX \
(MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2))
(MLXSW_REG_SBSR_REC_MAX_COUNT / \
(MLXSW_SP_SB_ING_TC_COUNT + MLXSW_SP_SB_EG_TC_COUNT))
struct mlxsw_sp_sb_sr_occ_query_cb_ctx {
u8 masked_count;
@ -831,7 +873,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) {
cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_INGRESS);
mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
@ -845,7 +887,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) {
cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_EGRESS);
mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
@ -880,23 +922,17 @@ next_batch:
local_port_1 = local_port;
masked_count = 0;
mlxsw_reg_sbsr_pack(sbsr_pl, false);
for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++)
mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++)
mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
}
for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) {
err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_INGRESS,
&bulk_list);
if (err)
goto out;
err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_EGRESS,
&bulk_list);
if (err)
goto out;
@ -945,23 +981,17 @@ next_batch:
local_port++;
masked_count = 0;
mlxsw_reg_sbsr_pack(sbsr_pl, true);
for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++)
mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++)
mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
}
for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) {
err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_INGRESS,
&bulk_list);
if (err)
goto out;
err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_EGRESS,
&bulk_list);
if (err)
goto out;
@ -994,10 +1024,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
u8 pool = pool_get(pool_index);
enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
pool, dir);
pool_index);
*p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur);
*p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max);

View File

@ -0,0 +1,347 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# A test for switch behavior under MC overload. An issue in Spectrum chips
# causes throughput of UC traffic to drop severely when a switch is under heavy
# MC load. This issue can be overcome by putting the switch to MC-aware mode.
# This test verifies that UC performance stays intact even as the switch is
# under MC flood, and therefore that the MC-aware mode is enabled and correctly
# configured.
#
# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
# at full speed. That makes it impossible to use iperf3 to simply measure the
# throughput, because many packets (that reach $h3) don't get to the kernel at
# all even in UDP mode (the situation is even worse in TCP mode, where one can't
# hope to see more than a couple Mbps).
#
# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
# each gets a different priority and we can use per-prio ethtool counters to
# measure the throughput. In order to avoid prioritizing unicast traffic, prio
# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
# thus TC 0).
#
# Mausezahn can't actually saturate the links unless it's using large frames.
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
# multicast traffic uses 8K frames.
#
# +-----------------------+ +----------------------------------+
# | H1 | | H2 |
# | | | unicast --> + $h2.111 |
# | | | traffic | 192.0.2.129/28 |
# | multicast | | | e-qos-map 0:1 |
# | traffic | | | |
# | $h1 + <----- | | + $h2 |
# +-----|-----------------+ +--------------|-------------------+
# | |
# +-----|-------------------------------------------------|-------------------+
# | + $swp1 + $swp2 |
# | | >1Gbps | >1Gbps |
# | +---|----------------+ +----------|----------------+ |
# | | + $swp1.1 | | + $swp2.111 | |
# | | BR1 | SW | BR111 | |
# | | + $swp3.1 | | + $swp3.111 | |
# | +---|----------------+ +----------|----------------+ |
# | \_________________________________________________/ |
# | | |
# | + $swp3 |
# | | 1Gbps bottleneck |
# | | prio qdisc: {0..7} -> 7 |
# +------------------------------------|--------------------------------------+
# |
# +--|-----------------+
# | + $h3 H3 |
# | | |
# | + $h3.111 |
# | 192.0.2.130/28 |
# +--------------------+
ALL_TESTS="
ping_ipv4
test_mc_aware
"
lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=6
source $lib_dir/lib.sh
h1_create()
{
simple_if_init $h1
mtu_set $h1 10000
}
h1_destroy()
{
mtu_restore $h1
simple_if_fini $h1
}
h2_create()
{
simple_if_init $h2
mtu_set $h2 10000
vlan_create $h2 111 v$h2 192.0.2.129/28
ip link set dev $h2.111 type vlan egress-qos-map 0:1
}
h2_destroy()
{
vlan_destroy $h2 111
mtu_restore $h2
simple_if_fini $h2
}
h3_create()
{
simple_if_init $h3
mtu_set $h3 10000
vlan_create $h3 111 v$h3 192.0.2.130/28
}
h3_destroy()
{
vlan_destroy $h3 111
mtu_restore $h3
simple_if_fini $h3
}
switch_create()
{
ip link set dev $swp1 up
mtu_set $swp1 10000
ip link set dev $swp2 up
mtu_set $swp2 10000
ip link set dev $swp3 up
mtu_set $swp3 10000
vlan_create $swp2 111
vlan_create $swp3 111
ethtool -s $swp3 speed 1000 autoneg off
tc qdisc replace dev $swp3 root handle 3: \
prio bands 8 priomap 7 7 7 7 7 7 7 7
ip link add name br1 type bridge vlan_filtering 0
ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp3 master br1
ip link add name br111 type bridge vlan_filtering 0
ip link set dev br111 up
ip link set dev $swp2.111 master br111
ip link set dev $swp3.111 master br111
}
switch_destroy()
{
ip link del dev br111
ip link del dev br1
tc qdisc del dev $swp3 root handle 3:
ethtool -s $swp3 autoneg on
vlan_destroy $swp3 111
vlan_destroy $swp2 111
mtu_restore $swp3
ip link set dev $swp3 down
mtu_restore $swp2
ip link set dev $swp2 down
mtu_restore $swp1
ip link set dev $swp1 down
}
setup_prepare()
{
h1=${NETIFS[p1]}
swp1=${NETIFS[p2]}
swp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
swp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
h3mac=$(mac_get $h3)
vrf_prepare
h1_create
h2_create
h3_create
switch_create
}
cleanup()
{
pre_cleanup
switch_destroy
h3_destroy
h2_destroy
h1_destroy
vrf_cleanup
}
ping_ipv4()
{
ping_test $h2 192.0.2.130
}
humanize()
{
local speed=$1; shift
for unit in bps Kbps Mbps Gbps; do
if (($(echo "$speed < 1024" | bc))); then
break
fi
speed=$(echo "scale=1; $speed / 1024" | bc)
done
echo "$speed${unit}"
}
rate()
{
local t0=$1; shift
local t1=$1; shift
local interval=$1; shift
echo $((8 * (t1 - t0) / interval))
}
check_rate()
{
local rate=$1; shift
local min=$1; shift
local what=$1; shift
if ((rate > min)); then
return 0
fi
echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr
return 1
}
measure_uc_rate()
{
local what=$1; shift
local interval=10
local i
local ret=0
# Dips in performance might cause momentary ingress rate to drop below
# 1Gbps. That wouldn't saturate egress and MC would thus get through,
# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
# average ingress rate to somewhat mitigate this.
local min_ingress=2147483648
mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
-a own -b $h3mac -t udp -q &
sleep 1
for i in {5..0}; do
local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
sleep $interval
local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
local ir=$(rate $u0 $u1 $interval)
local er=$(rate $t0 $t1 $interval)
if check_rate $ir $min_ingress "$what ingress rate"; then
break
fi
# Fail the test if we can't get the throughput.
if ((i == 0)); then
ret=1
fi
done
# Suppress noise from killing mausezahn.
{ kill %% && wait; } 2>/dev/null
echo $ir $er
exit $ret
}
test_mc_aware()
{
RET=0
local -a uc_rate
uc_rate=($(measure_uc_rate "UC-only"))
check_err $? "Could not get high enough UC-only ingress rate"
local ucth1=${uc_rate[1]}
mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
local d0=$(date +%s)
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
local -a uc_rate_2
uc_rate_2=($(measure_uc_rate "UC+MC"))
check_err $? "Could not get high enough UC+MC ingress rate"
local ucth2=${uc_rate_2[1]}
local d1=$(date +%s)
local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
local deg=$(bc <<< "
scale=2
ret = 100 * ($ucth1 - $ucth2) / $ucth1
if (ret > 0) { ret } else { 0 }
")
check_err $(bc <<< "$deg > 10")
local interval=$((d1 - d0))
local mc_ir=$(rate $u0 $u1 $interval)
local mc_er=$(rate $t0 $t1 $interval)
# Suppress noise from killing mausezahn.
{ kill %% && wait; } 2>/dev/null
log_test "UC performace under MC overload"
echo "UC-only throughput $(humanize $ucth1)"
echo "UC+MC throughput $(humanize $ucth2)"
echo "Degradation $deg %"
echo
echo "Full report:"
echo " UC only:"
echo " ingress UC throughput $(humanize ${uc_rate[0]})"
echo " egress UC throughput $(humanize ${uc_rate[1]})"
echo " UC+MC:"
echo " ingress UC throughput $(humanize ${uc_rate_2[0]})"
echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
echo " ingress MC throughput $(humanize $mc_ir)"
echo " egress MC throughput $(humanize $mc_er)"
}
trap cleanup EXIT
setup_prepare
setup_wait
tests_run
exit $EXIT_STATUS

View File

@ -494,6 +494,14 @@ tc_rule_stats_get()
| jq '.[1].options.actions[].stats.packets'
}
ethtool_stats_get()
{
local dev=$1; shift
local stat=$1; shift
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
mac_get()
{
local if_name=$1
@ -541,6 +549,23 @@ forwarding_restore()
sysctl_restore net.ipv4.conf.all.forwarding
}
declare -A MTU_ORIG
mtu_set()
{
local dev=$1; shift
local mtu=$1; shift
MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu')
ip link set dev $dev mtu $mtu
}
mtu_restore()
{
local dev=$1; shift
ip link set dev $dev mtu ${MTU_ORIG["$dev"]}
}
tc_offload_check()
{
local num_netifs=${1:-$NUM_NETIFS}