From a9f36656b519a9a21309793c306941a3cd0eeb8f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:24 +0300 Subject: [PATCH 01/13] mlxsw: spectrum: Init shaper for TCs 8..15 With introduction of MC-aware mode to mlxsw, it became necessary to configure TCs above 7 as well. There is now code in mlxsw to disable ETS for these higher classes, but disablement of max shaper was neglected. By default, max shaper is currently disabled to begin with, so the problem is just cosmetic. However, for symmetry, do like we do for ETS configuration, and call mlxsw_sp_port_ets_maxrate_set() for both TC i and i + 8. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 930700413b1d0..1fc20263b15b8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2804,6 +2804,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) MLXSW_REG_QEEC_MAS_DIS); if (err) return err; + + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i + 8, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; } /* Map all priorities to traffic class 0. */ From 3a4dbfb044ea37e9cabb1e2499859e423a96ab15 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:25 +0300 Subject: [PATCH 02/13] mlxsw: spectrum_buffers: Use devlink pool indices throughout Currently, mlxsw assumes that each ingress pool has its egress counterpart, and that pool index for purposes of caching matches the index with which the hardware should be configured. As we want to expose the MC pool, both of these assumptions break. Instead, maintain the pool index as long as possible. Unify ingress and egress caches and use the pool index as cache index as well. Only translate to FW pool numbering when actually packing the registers. This simplifies things considerably, as the pool index is the only quantity necessary to uniquely identify a pool, and the pool/direction split is not necessary until firmware is talked to. To support the mapping between pool indices and pool numbers and directions, which is not neatly mathematical anymore, introduce a pool descriptor table, indexed by pool index, to facilitate the translation. Include the MC pool in the descriptor table as well, so that it can be referenced from mlxsw_sp_sb_cms_egress. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 375 ++++++++---------- 1 file changed, 170 insertions(+), 205 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 3589432d16437..c3e69cba28cf4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -25,7 +25,7 @@ struct mlxsw_cp_sb_occ { struct mlxsw_sp_sb_cm { u32 min_buff; u32 max_buff; - u8 pool; + u16 pool_index; struct mlxsw_cp_sb_occ occ; }; @@ -35,16 +35,35 @@ struct mlxsw_sp_sb_pm { struct mlxsw_cp_sb_occ occ; }; -#define MLXSW_SP_SB_POOL_COUNT 4 +struct mlxsw_sp_sb_pool_des { + enum mlxsw_reg_sbxx_dir dir; + u8 pool; +}; + +/* Order ingress pools before egress pools. */ +static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { + {MLXSW_REG_SBXX_DIR_INGRESS, 0}, + {MLXSW_REG_SBXX_DIR_INGRESS, 1}, + {MLXSW_REG_SBXX_DIR_INGRESS, 2}, + {MLXSW_REG_SBXX_DIR_INGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 0}, + {MLXSW_REG_SBXX_DIR_EGRESS, 1}, + {MLXSW_REG_SBXX_DIR_EGRESS, 2}, + {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, +}; + +#define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess) + #define MLXSW_SP_SB_TC_COUNT 8 struct mlxsw_sp_sb_port { struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; + struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN]; }; struct mlxsw_sp_sb { - struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; + struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN]; struct mlxsw_sp_sb_port *ports; u32 cell_size; }; @@ -60,10 +79,9 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) } static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, - u8 pool, - enum mlxsw_reg_sbxx_dir dir) + u16 pool_index) { - return &mlxsw_sp->sb->prs[dir][pool]; + return &mlxsw_sp->sb->prs[pool_index]; } static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, @@ -74,81 +92,87 @@ static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 pool, - enum mlxsw_reg_sbxx_dir dir) + u8 local_port, u16 pool_index) { - return &mlxsw_sp->sb->ports[local_port].pms[dir][pool]; + return &mlxsw_sp->sb->ports[local_port].pms[pool_index]; } -static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool, - enum mlxsw_reg_sbxx_dir dir, +static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, enum mlxsw_reg_sbpr_mode mode, u32 size) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpr_pl[MLXSW_REG_SBPR_LEN]; struct mlxsw_sp_sb_pr *pr; int err; - mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size); + mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode, size); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); if (err) return err; - pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pr->mode = mode; pr->size = size; return 0; } static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, - u32 min_buff, u32 max_buff, u8 pool) + u8 pg_buff, u32 min_buff, u32 max_buff, + u16 pool_index) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbcm_pl[MLXSW_REG_SBCM_LEN]; int err; - mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir, - min_buff, max_buff, pool); + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir, + min_buff, max_buff, des->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); if (err) return err; if (pg_buff < MLXSW_SP_SB_TC_COUNT) { struct mlxsw_sp_sb_cm *cm; - cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, + des->dir); cm->min_buff = min_buff; cm->max_buff = max_buff; - cm->pool = pool; + cm->pool_index = pool_index; } return 0; } static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pool, enum mlxsw_reg_sbxx_dir dir, - u32 min_buff, u32 max_buff) + u16 pool_index, u32 min_buff, u32 max_buff) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; int err; - mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false, min_buff, max_buff); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl); if (err) return err; - pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); pm->min_buff = min_buff; pm->max_buff = max_buff; return 0; } static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pool, enum mlxsw_reg_sbxx_dir dir, - struct list_head *bulk_list) + u16 pool_index, struct list_head *bulk_list) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; - mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, + true, 0, 0); return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, bulk_list, NULL, 0); } @@ -163,14 +187,16 @@ static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core, } static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pool, enum mlxsw_reg_sbxx_dir dir, - struct list_head *bulk_list) + u16 pool_index, struct list_head *bulk_list) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; - pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); - mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0); + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, + false, 0, 0); return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, bulk_list, mlxsw_sp_sb_pm_occ_query_cb, @@ -254,30 +280,26 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) .size = _size, \ } -static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = { + /* Ingress pools. */ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_INGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_INGRESS_MNG_SIZE), -}; - -#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) - -static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { + /* Egress pools. */ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), }; -#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress) +#define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs) -static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_reg_sbxx_dir dir, - const struct mlxsw_sp_sb_pr *prs, - size_t prs_len) +static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sb_pr *prs, + size_t prs_len) { int i; int err; @@ -285,32 +307,18 @@ static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < prs_len; i++) { u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size); - err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size); + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, size); if (err) return err; } return 0; } -static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, - mlxsw_sp_sb_prs_ingress, - MLXSW_SP_SB_PRS_INGRESS_LEN); - if (err) - return err; - return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_sb_prs_egress, - MLXSW_SP_SB_PRS_EGRESS_LEN); -} - #define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \ { \ .min_buff = _min_buff, \ .max_buff = _max_buff, \ - .pool = _pool, \ + .pool_index = _pool, \ } static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { @@ -329,38 +337,38 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { #define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(1, 0xff, 0), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(1, 0xff, 4), }; #define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) -#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0) +#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, @@ -405,12 +413,16 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; + if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir)) + continue; + /* All pools are initialized using dynamic thresholds, * therefore 'max_buff' isn't specified in cells. */ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); - err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, - min_buff, cm->max_buff, cm->pool); + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, cm->max_buff, + cm->pool_index); if (err) return err; } @@ -448,91 +460,67 @@ static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) .max_buff = _max_buff, \ } -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = { +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { + /* Ingress pools. */ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), -}; - -#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress) - -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = { + /* Egress pools. */ MLXSW_SP_SB_PM(0, 7), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), }; -#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress) +#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) -static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, - enum mlxsw_reg_sbxx_dir dir, - const struct mlxsw_sp_sb_pm *pms, - size_t pms_len) +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int i; int err; - for (i = 0; i < pms_len; i++) { - const struct mlxsw_sp_sb_pm *pm; + for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { + const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i]; - pm = &pms[i]; - err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir, - pm->min_buff, pm->max_buff); + err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port, + i, pm->min_buff, pm->max_buff); if (err) return err; } return 0; } -static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - int err; - - err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - MLXSW_REG_SBXX_DIR_INGRESS, - mlxsw_sp_sb_pms_ingress, - MLXSW_SP_SB_PMS_INGRESS_LEN); - if (err) - return err; - return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_sb_pms_egress, - MLXSW_SP_SB_PMS_EGRESS_LEN); -} - struct mlxsw_sp_sb_mm { u32 min_buff; u32 max_buff; - u8 pool; + u16 pool_index; }; #define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ { \ .min_buff = _min_buff, \ .max_buff = _max_buff, \ - .pool = _pool, \ + .pool_index = _pool, \ } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(20000, 0xff, 4), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -544,16 +532,18 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) int err; for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { + const struct mlxsw_sp_sb_pool_des *des; const struct mlxsw_sp_sb_mm *mc; u32 min_buff; mc = &mlxsw_sp_sb_mms[i]; + des = &mlxsw_sp_sb_pool_dess[mc->pool_index]; /* All pools are initialized using dynamic thresholds, * therefore 'max_buff' isn't specified in cells. */ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff); mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff, - mc->pool); + des->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) return err; @@ -561,8 +551,24 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static void mlxsw_sp_pool_count(u16 *p_ingress_len, u16 *p_egress_len) +{ + int i; + + for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; ++i) + if (mlxsw_sp_sb_pool_dess[i].dir == MLXSW_REG_SBXX_DIR_EGRESS) + goto out; + WARN(1, "No egress pools\n"); + +out: + *p_ingress_len = i; + *p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i; +} + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { + u16 ing_pool_count; + u16 eg_pool_count; u64 sb_size; int err; @@ -581,7 +587,8 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) goto err_sb_ports_init; - err = mlxsw_sp_sb_prs_init(mlxsw_sp); + err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp_sb_prs, + MLXSW_SP_SB_PRS_LEN); if (err) goto err_sb_prs_init; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); @@ -590,9 +597,10 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_sb_mms_init(mlxsw_sp); if (err) goto err_sb_mms_init; + mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count); err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_POOL_COUNT, + ing_pool_count, + eg_pool_count, MLXSW_SP_SB_TC_COUNT, MLXSW_SP_SB_TC_COUNT); if (err) @@ -632,36 +640,15 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } -static u8 pool_get(u16 pool_index) -{ - return pool_index % MLXSW_SP_SB_POOL_COUNT; -} - -static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir) -{ - u16 pool_index; - - pool_index = pool; - if (dir == MLXSW_REG_SBXX_DIR_EGRESS) - pool_index += MLXSW_SP_SB_POOL_COUNT; - return pool_index; -} - -static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index) -{ - return pool_index < MLXSW_SP_SB_POOL_COUNT ? - MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS; -} - int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info) { + enum mlxsw_reg_sbxx_dir dir = mlxsw_sp_sb_pool_dess[pool_index].dir; struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + struct mlxsw_sp_sb_pr *pr; + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; @@ -674,34 +661,31 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size); - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); enum mlxsw_reg_sbpr_mode mode; if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EINVAL; mode = (enum mlxsw_reg_sbpr_mode) threshold_type; - return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode, pool_size); } #define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ -static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, - enum mlxsw_reg_sbxx_dir dir, u32 max_buff) +static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + u32 max_buff) { - struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff); } -static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, - enum mlxsw_reg_sbxx_dir dir, u32 threshold, - u32 *p_max_buff) +static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + u32 threshold, u32 *p_max_buff) { - struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) { int val; @@ -725,12 +709,10 @@ int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, mlxsw_core_port_driver_priv(mlxsw_core_port); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, - pool, dir); + pool_index); - *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir, + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool_index, pm->max_buff); return 0; } @@ -743,17 +725,15 @@ int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, mlxsw_core_port_driver_priv(mlxsw_core_port); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); u32 max_buff; int err; - err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, threshold, &max_buff); if (err) return err; - return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir, + return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool_index, 0, max_buff); } @@ -771,9 +751,9 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); - *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool_index, cm->max_buff); - *p_pool_index = pool_index_get(cm->pool, dir); + *p_pool_index = cm->pool_index; return 0; } @@ -788,20 +768,19 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; - u8 pool = pool_get(pool_index); u32 max_buff; int err; - if (dir != dir_get(pool_index)) + if (dir != mlxsw_sp_sb_pool_dess[pool_index].dir) return -EINVAL; - err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, threshold, &max_buff); if (err) return err; - return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, - 0, max_buff, pool); + return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, + 0, max_buff, pool_index); } #define MASKED_COUNT_MAX \ @@ -889,14 +868,8 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_INGRESS, - &bulk_list); - if (err) - goto out; - err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_EGRESS, &bulk_list); if (err) goto out; @@ -954,14 +927,8 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { - err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_INGRESS, - &bulk_list); - if (err) - goto out; + for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_EGRESS, &bulk_list); if (err) goto out; @@ -994,10 +961,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, mlxsw_core_port_driver_priv(mlxsw_core_port); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, - pool, dir); + pool_index); *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur); *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max); From 5f95d20b3a5ed571e14f9007c4346ebac76a0f58 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:26 +0300 Subject: [PATCH 03/13] mlxsw: spectrum_buffers: Split TC_COUNT into ingress and egress Current code assumes that ingress and egress has the same number of traffic classes. Since the introduction of MC-aware mode that assumption hasn't held anymore, and there have been 16 TCs on the egress as opposed to 8 on ingress. Break the assumption of symmetry by splitting the artifacts related to shared-buffer TC counting to ingress and egress parts. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index c3e69cba28cf4..7f29000c10a10 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -55,10 +55,12 @@ static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { #define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess) -#define MLXSW_SP_SB_TC_COUNT 8 +#define MLXSW_SP_SB_ING_TC_COUNT 8 +#define MLXSW_SP_SB_EG_TC_COUNT 16 struct mlxsw_sp_sb_port { - struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT]; + struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT]; struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN]; }; @@ -84,11 +86,25 @@ static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, return &mlxsw_sp->sb->prs[pool_index]; } +static bool mlxsw_sp_sb_cm_exists(u8 pg_buff, enum mlxsw_reg_sbxx_dir dir) +{ + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + return pg_buff < MLXSW_SP_SB_ING_TC_COUNT; + else + return pg_buff < MLXSW_SP_SB_EG_TC_COUNT; +} + static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 pg_buff, enum mlxsw_reg_sbxx_dir dir) { - return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff]; + struct mlxsw_sp_sb_port *sb_port = &mlxsw_sp->sb->ports[local_port]; + + WARN_ON(!mlxsw_sp_sb_cm_exists(pg_buff, dir)); + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + return &sb_port->ing_cms[pg_buff]; + else + return &sb_port->eg_cms[pg_buff]; } static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, @@ -131,7 +147,7 @@ static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); if (err) return err; - if (pg_buff < MLXSW_SP_SB_TC_COUNT) { + if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) { struct mlxsw_sp_sb_cm *cm; cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, @@ -601,8 +617,8 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size, ing_pool_count, eg_pool_count, - MLXSW_SP_SB_TC_COUNT, - MLXSW_SP_SB_TC_COUNT); + MLXSW_SP_SB_ING_TC_COUNT, + MLXSW_SP_SB_EG_TC_COUNT); if (err) goto err_devlink_sb_register; @@ -784,7 +800,8 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, } #define MASKED_COUNT_MAX \ - (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2)) + (MLXSW_REG_SBSR_REC_MAX_COUNT / \ + (MLXSW_SP_SB_ING_TC_COUNT + MLXSW_SP_SB_EG_TC_COUNT)) struct mlxsw_sp_sb_sr_occ_query_cb_ctx { u8 masked_count; @@ -810,7 +827,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) { cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, MLXSW_REG_SBXX_DIR_INGRESS); mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, @@ -824,7 +841,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) { cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, MLXSW_REG_SBXX_DIR_EGRESS); mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, @@ -859,10 +876,10 @@ int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, local_port_1 = local_port; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, false); - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); - } for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; @@ -918,10 +935,10 @@ int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, local_port++; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, true); - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); - } for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; From fe07d723c00db23c1683a994fd57cc9b2aa9aaa2 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:27 +0300 Subject: [PATCH 04/13] mlxsw: spectrum_buffers: Keep shared buffer size in mlxsw_sp_sb Entities of infinite size will be reported as if they had the maximum size allowed by the chip. To that end, keep track of maximum shared buffer size in mlxsw_sp->sb. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 7f29000c10a10..81f3af6e80aff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -68,6 +68,7 @@ struct mlxsw_sp_sb { struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN]; struct mlxsw_sp_sb_port *ports; u32 cell_size; + u64 sb_size; }; u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) @@ -585,7 +586,6 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { u16 ing_pool_count; u16 eg_pool_count; - u64 sb_size; int err; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) @@ -593,12 +593,13 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EIO; - sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL); if (!mlxsw_sp->sb) return -ENOMEM; mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); + mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_BUFFER_SIZE); err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) @@ -614,7 +615,8 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_sb_mms_init; mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count); - err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size, + err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, + mlxsw_sp->sb->sb_size, ing_pool_count, eg_pool_count, MLXSW_SP_SB_ING_TC_COUNT, From f0024f0d98519f7ff5640ab6f11f102594985038 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:28 +0300 Subject: [PATCH 05/13] mlxsw: spectrum_buffers: Allow pools of infinite size The MC pool should have an infinite size (i.e. no quota). To that end, add infi_size to the SBPR register and extend mlxsw_reg_sbpr_pack(). Also add MLXSW_SP_SB_INFI to denote buffers that should have an infinite size. Change mlxsw_sp_sb_pr_write() to take as parameter byte size, instead of cell size, and add the special handling of infinite buffers. Report pools with infinite size as if they actually take the full shared buffer size. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 11 +++++++- .../mellanox/mlxsw/spectrum_buffers.c | 27 ++++++++++++++----- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6e8b619b769b4..694f6a443769a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -8336,8 +8336,15 @@ MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2); */ MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4); +/* reg_sbpr_infi_size + * Size is infinite. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, infi_size, 0x04, 31, 1); + /* reg_sbpr_size * Pool size in buffer cells. + * Reserved when infi_size = 1. * Access: RW */ MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24); @@ -8355,13 +8362,15 @@ MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4); static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, enum mlxsw_reg_sbxx_dir dir, - enum mlxsw_reg_sbpr_mode mode, u32 size) + enum mlxsw_reg_sbpr_mode mode, u32 size, + bool infi_size) { MLXSW_REG_ZERO(sbpr, payload); mlxsw_reg_sbpr_pool_set(payload, pool); mlxsw_reg_sbpr_dir_set(payload, dir); mlxsw_reg_sbpr_mode_set(payload, mode); mlxsw_reg_sbpr_size_set(payload, size); + mlxsw_reg_sbpr_infi_size_set(payload, infi_size); } /* SBCM - Shared Buffer Class Management Register diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 81f3af6e80aff..646022fa4cbe8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -29,6 +29,8 @@ struct mlxsw_sp_sb_cm { struct mlxsw_cp_sb_occ occ; }; +#define MLXSW_SP_SB_INFI -1U + struct mlxsw_sp_sb_pm { u32 min_buff; u32 max_buff; @@ -115,7 +117,8 @@ static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, - enum mlxsw_reg_sbpr_mode mode, u32 size) + enum mlxsw_reg_sbpr_mode mode, + u32 size, bool infi_size) { const struct mlxsw_sp_sb_pool_des *des = &mlxsw_sp_sb_pool_dess[pool_index]; @@ -123,11 +126,14 @@ static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, struct mlxsw_sp_sb_pr *pr; int err; - mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode, size); + mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode, + size, infi_size); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); if (err) return err; + if (infi_size) + size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp->sb->sb_size); pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pr->mode = mode; pr->size = size; @@ -322,9 +328,17 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, int err; for (i = 0; i < prs_len; i++) { - u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size); - - err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, size); + u32 size = prs[i].size; + u32 size_cells; + + if (size == MLXSW_SP_SB_INFI) { + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + 0, true); + } else { + size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size); + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + size_cells, false); + } if (err) return err; } @@ -685,7 +699,8 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, return -EINVAL; mode = (enum mlxsw_reg_sbpr_mode) threshold_type; - return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode, pool_size); + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode, + pool_size, false); } #define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ From d144e3a2c9f79af5202d832e85fb72b0d6954ef5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:29 +0300 Subject: [PATCH 06/13] mlxsw: spectrum_buffers: Allow an infinite maximum for per-TC pool limit The SBCM register configures the shared buffer configuration according to port and TC. So far all pools have had a dynamic size, where the infinite size is easy to express by using max_buff of 0xff. However the MC pool should be configured with static size, and the infinite size thus needs to be set using the field SBCM.infi_max. Therefore add the field infi_max to the SBCM register and to mlxsw_reg_sbcm_pack(). Extend mlxsw_sp_sb_cm_write() to handle infinite sizes as well. Report infinite pool limits as if the limit actually were the total shared buffer size. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 11 ++++++++- .../mellanox/mlxsw/spectrum_buffers.c | 23 +++++++++++++------ 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 694f6a443769a..df81e0a1eb648 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -8418,6 +8418,12 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); #define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1 #define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14 +/* reg_sbcm_infi_max + * Max buffer is infinite. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, infi_max, 0x1C, 31, 1); + /* reg_sbcm_max_buff * When the pool associated to the port-pg/tclass is configured to * static, Maximum buffer size for the limiter configured in cells. @@ -8427,6 +8433,7 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); * 0: 0 * i: (1/128)*2^(i-1), for i=1..14 * 0xFF: Infinity + * Reserved when infi_max = 1. * Access: RW */ MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24); @@ -8439,7 +8446,8 @@ MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4); static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, - u32 min_buff, u32 max_buff, u8 pool) + u32 min_buff, u32 max_buff, + bool infi_max, u8 pool) { MLXSW_REG_ZERO(sbcm, payload); mlxsw_reg_sbcm_local_port_set(payload, local_port); @@ -8447,6 +8455,7 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, mlxsw_reg_sbcm_dir_set(payload, dir); mlxsw_reg_sbcm_min_buff_set(payload, min_buff); mlxsw_reg_sbcm_max_buff_set(payload, max_buff); + mlxsw_reg_sbcm_infi_max_set(payload, infi_max); mlxsw_reg_sbcm_pool_set(payload, pool); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 646022fa4cbe8..de51f567a76b9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -142,20 +142,24 @@ static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 pg_buff, u32 min_buff, u32 max_buff, - u16 pool_index) + bool infi_max, u16 pool_index) { const struct mlxsw_sp_sb_pool_des *des = &mlxsw_sp_sb_pool_dess[pool_index]; char sbcm_pl[MLXSW_REG_SBCM_LEN]; + struct mlxsw_sp_sb_cm *cm; int err; mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir, - min_buff, max_buff, des->pool); + min_buff, max_buff, infi_max, des->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); if (err) return err; + if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) { - struct mlxsw_sp_sb_cm *cm; + if (infi_max) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, + mlxsw_sp->sb->sb_size); cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, des->dir); @@ -451,9 +455,14 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, * therefore 'max_buff' isn't specified in cells. */ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); - err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, - min_buff, cm->max_buff, - cm->pool_index); + if (cm->max_buff == MLXSW_SP_SB_INFI) + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, 0, + true, cm->pool_index); + else + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, cm->max_buff, + false, cm->pool_index); if (err) return err; } @@ -813,7 +822,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, return err; return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, - 0, max_buff, pool_index); + 0, max_buff, false, pool_index); } #define MASKED_COUNT_MAX \ From 41057e28288b89da91d535ff3244bfcf90ebc97c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:30 +0300 Subject: [PATCH 07/13] mlxsw: spectrum_buffers: Pass SBPM min_size in cells The SBPM register configures the shared buffer allocation and configuration per port and pool. The min_buff value is the buffer size dedicated to this single function, and is configured in cells. Currently, all sb_pm entries have 0 for min_buff, and therefore the actual unit is immaterial. However, in a follow-up patch we want to add entries with non-zero minimum. Therefore pass the min_buff from the sb_pm table through the cell conversion before handing it over to mlxsw_sp_sb_pm_write(). Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index de51f567a76b9..021b7c219f4e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -523,9 +523,11 @@ static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i]; + u32 min_buff; + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff); err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port, - i, pm->min_buff, pm->max_buff); + i, min_buff, pm->max_buff); if (err) return err; } From 5be3637e069fe70a9ba6094970edfabaae1d0f1c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:31 +0300 Subject: [PATCH 08/13] mlxsw: spectrum_buffers: Allow configuration of static pools Some pools configured through the sb_pm entries may have by default static size. The MC pool is now not explicitly configured, however it gets configured as static implicitly by 0-initializing sb->prs, and a follow-up patch adds an explicit configuration to the same effect. To support this, pass max_buff taken from sb_pm and sb_cm entries through cell conversion before handing it to mlxsw_sp_sb_pm_write(), if the pool that the sb_pm entry configures is statically-sized. To keep current behavior, update mlxsw_sp_sb_cms_egress[] to denote buffer sizes in bytes (assuming Spectrum 1 cell sizes, which the original code assumed as well) instead of cells. Note that a follow-up patch changes this to infinite size. Also tweak a comment at SBMM configuration to remain true now that statically-sized pools exist. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 021b7c219f4e5..c7b453f7881cd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -380,14 +380,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), - MLXSW_SP_SB_CM(0, 140000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, 13440000, 8), MLXSW_SP_SB_CM(1, 0xff, 4), }; @@ -433,6 +433,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { #define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) +static bool +mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + + return pr->mode == MLXSW_REG_SBPR_MODE_STATIC; +} + static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, enum mlxsw_reg_sbxx_dir dir, const struct mlxsw_sp_sb_cm *cms, @@ -444,6 +452,7 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; u32 min_buff; + u32 max_buff; if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ @@ -451,18 +460,21 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir)) continue; - /* All pools are initialized using dynamic thresholds, - * therefore 'max_buff' isn't specified in cells. - */ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); - if (cm->max_buff == MLXSW_SP_SB_INFI) + max_buff = cm->max_buff; + if (max_buff == MLXSW_SP_SB_INFI) { err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, min_buff, 0, true, cm->pool_index); - else + } else { + if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, + cm->pool_index)) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, + max_buff); err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, - min_buff, cm->max_buff, + min_buff, max_buff, false, cm->pool_index); + } if (err) return err; } @@ -523,11 +535,15 @@ static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i]; + u32 max_buff; u32 min_buff; min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff); + max_buff = pm->max_buff; + if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i)) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff); err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port, - i, min_buff, pm->max_buff); + i, min_buff, max_buff); if (err) return err; } @@ -580,8 +596,8 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) mc = &mlxsw_sp_sb_mms[i]; des = &mlxsw_sp_sb_pool_dess[mc->pool_index]; - /* All pools are initialized using dynamic thresholds, - * therefore 'max_buff' isn't specified in cells. + /* All pools used by sb_mm's are initialized using dynamic + * thresholds, therefore 'max_buff' isn't specified in cells. */ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff); mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff, From e83c045e53d75c6b231fd5d5753d271c3e51e56d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:32 +0300 Subject: [PATCH 09/13] mlxsw: spectrum_buffers: Configure MC pool Pool 15 (indexed as 8) is dedicated to MC traffic. Its configuration has been kept at default, because the table-based configuration wasn't expressive enough to allow the explicit configuration. Now that the configuration of pool 15 can be described, do so. The MC pool should have infinite size, infinite per-TC quota, and per-port limit of 90K. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_buffers.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index c7b453f7881cd..7b9f79c7c0256 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -320,6 +320,7 @@ static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; #define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs) @@ -380,14 +381,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), MLXSW_SP_SB_CM(1500, 9, 4), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), - MLXSW_SP_SB_CM(0, 13440000, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), MLXSW_SP_SB_CM(1, 0xff, 4), }; @@ -523,6 +524,7 @@ static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(10000, 90000), }; #define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) From 6a23f9a49722c391c5624ad1790997cd6b6b8fef Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:33 +0300 Subject: [PATCH 10/13] mlxsw: spectrum_buffers: Tweak SBMM configuration The SBMM register configures shared buffer allocation and settings for MC packets according to switch priority. The recommended values are no reserved buffer and alpha of 1/4, which corresponds to buf_max of 6. Update mlxsw_sp_sb_mms accordingly. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../mellanox/mlxsw/spectrum_buffers.c | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 7b9f79c7c0256..12c61e0cc5705 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -566,21 +566,21 @@ struct mlxsw_sp_sb_mm { } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), - MLXSW_SP_SB_MM(20000, 0xff, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) From 3136a3698804f55a26f68e8c897d7ccf254923ad Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:34 +0300 Subject: [PATCH 11/13] selftests: forwarding: lib: Add ethtool_stats_get() Add a new service function to obtain ethtool counters. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ca53b539aa2d1..4c2351996a7f3 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -494,6 +494,14 @@ tc_rule_stats_get() | jq '.[1].options.actions[].stats.packets' } +ethtool_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 +} + mac_get() { local if_name=$1 From a381ed12ea33888a08f0e3e5ba22a255fa5f7257 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:35 +0300 Subject: [PATCH 12/13] selftests: forwarding: lib: Add mtu_set(), mtu_restore() Some selftests need to tweak MTU of an interface, and naturally should at teardown restore the MTU back to the original value. Add two functions to facilitate this MTU handling: mtu_set() to change MTU value, and mtu_reset() to change it back to what it was before. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/lib.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 4c2351996a7f3..0e73698b20489 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -549,6 +549,23 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +declare -A MTU_ORIG +mtu_set() +{ + local dev=$1; shift + local mtu=$1; shift + + MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu') + ip link set dev $dev mtu $mtu +} + +mtu_restore() +{ + local dev=$1; shift + + ip link set dev $dev mtu ${MTU_ORIG["$dev"]} +} + tc_offload_check() { local num_netifs=${1:-$NUM_NETIFS} From b5638d46c90a6998859a4fc9ea697b264da96092 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 20 Sep 2018 09:21:36 +0300 Subject: [PATCH 13/13] selftests: mlxsw: Add a test for UC behavior under MC flood A so-called "MC-aware" mode has recently been enabled in mlxsw. In MC-aware mode, BUM traffic is handled in a special way so that when a switch is flooded with BUM, UC performance isn't unduly impacted. Without enablement of this mode, a stream of BUM traffic can cause sustained UC throughput drop in excess of 99 %. Add a test for this behavior. Compare how much UC throughput degrades as a stream of broadcast frames floods the switch. A minimal degradation is tolerated to cover for glitches in traffic injection performance. Signed-off-by: Petr Machata Reviewed-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_mc_aware.sh | 347 ++++++++++++++++++ 1 file changed, 347 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh new file mode 100644 index 0000000000000..0150bb2741eb1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for switch behavior under MC overload. An issue in Spectrum chips +# causes throughput of UC traffic to drop severely when a switch is under heavy +# MC load. This issue can be overcome by putting the switch to MC-aware mode. +# This test verifies that UC performance stays intact even as the switch is +# under MC flood, and therefore that the MC-aware mode is enabled and correctly +# configured. +# +# Because mlxsw throttles CPU port, the traffic can't actually reach userspace +# at full speed. That makes it impossible to use iperf3 to simply measure the +# throughput, because many packets (that reach $h3) don't get to the kernel at +# all even in UDP mode (the situation is even worse in TCP mode, where one can't +# hope to see more than a couple Mbps). +# +# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. +# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore +# each gets a different priority and we can use per-prio ethtool counters to +# measure the throughput. In order to avoid prioritizing unicast traffic, prio +# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and +# thus TC 0). +# +# Mausezahn can't actually saturate the links unless it's using large frames. +# Thus we set MTU to 10K on all involved interfaces. Then both unicast and +# multicast traffic uses 8K frames. +# +# +-----------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | | | traffic | 192.0.2.129/28 | +# | multicast | | | e-qos-map 0:1 | +# | traffic | | | | +# | $h1 + <----- | | + $h2 | +# +-----|-----------------+ +--------------|-------------------+ +# | | +# +-----|-------------------------------------------------|-------------------+ +# | + $swp1 + $swp2 | +# | | >1Gbps | >1Gbps | +# | +---|----------------+ +----------|----------------+ | +# | | + $swp1.1 | | + $swp2.111 | | +# | | BR1 | SW | BR111 | | +# | | + $swp3.1 | | + $swp3.111 | | +# | +---|----------------+ +----------|----------------+ | +# | \_________________________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | prio qdisc: {0..7} -> 7 | +# +------------------------------------|--------------------------------------+ +# | +# +--|-----------------+ +# | + $h3 H3 | +# | | | +# | + $h3.111 | +# | 192.0.2.130/28 | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + test_mc_aware +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 +} + +h1_destroy() +{ + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.129/28 + ip link set dev $h2.111 type vlan egress-qos-map 0:1 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.130/28 +} + +h3_destroy() +{ + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + mtu_set $swp1 10000 + + ip link set dev $swp2 up + mtu_set $swp2 10000 + + ip link set dev $swp3 up + mtu_set $swp3 10000 + + vlan_create $swp2 111 + vlan_create $swp3 111 + + ethtool -s $swp3 speed 1000 autoneg off + tc qdisc replace dev $swp3 root handle 3: \ + prio bands 8 priomap 7 7 7 7 7 7 7 7 + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 up + ip link set dev $swp1 master br1 + ip link set dev $swp3 master br1 + + ip link add name br111 type bridge vlan_filtering 0 + ip link set dev br111 up + ip link set dev $swp2.111 master br111 + ip link set dev $swp3.111 master br111 +} + +switch_destroy() +{ + ip link del dev br111 + ip link del dev br1 + + tc qdisc del dev $swp3 root handle 3: + ethtool -s $swp3 autoneg on + + vlan_destroy $swp3 111 + vlan_destroy $swp2 111 + + mtu_restore $swp3 + ip link set dev $swp3 down + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h2 192.0.2.130 +} + +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr + return 1 +} + +measure_uc_rate() +{ + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + sleep 1 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep $interval + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + echo $ir $er + exit $ret +} + +test_mc_aware() +{ + RET=0 + + local -a uc_rate + uc_rate=($(measure_uc_rate "UC-only")) + check_err $? "Could not get high enough UC-only ingress rate" + local ucth1=${uc_rate[1]} + + mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local -a uc_rate_2 + uc_rate_2=($(measure_uc_rate "UC+MC")) + check_err $? "Could not get high enough UC+MC ingress rate" + local ucth2=${uc_rate_2[1]} + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local deg=$(bc <<< " + scale=2 + ret = 100 * ($ucth1 - $ucth2) / $ucth1 + if (ret > 0) { ret } else { 0 } + ") + check_err $(bc <<< "$deg > 10") + + local interval=$((d1 - d0)) + local mc_ir=$(rate $u0 $u1 $interval) + local mc_er=$(rate $t0 $t1 $interval) + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "UC performace under MC overload" + + echo "UC-only throughput $(humanize $ucth1)" + echo "UC+MC throughput $(humanize $ucth2)" + echo "Degradation $deg %" + echo + echo "Full report:" + echo " UC only:" + echo " ingress UC throughput $(humanize ${uc_rate[0]})" + echo " egress UC throughput $(humanize ${uc_rate[1]})" + echo " UC+MC:" + echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" + echo " egress UC throughput $(humanize ${uc_rate_2[1]})" + echo " ingress MC throughput $(humanize $mc_ir)" + echo " egress MC throughput $(humanize $mc_er)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS