Skip to content

Commit

Permalink
Merge branch 'mlxsw-devlink-shared-buffers'
Browse files Browse the repository at this point in the history
Jiri Pirko says:

====================
devlink + mlxsw: add support for config and control of shared buffers

ASICs implement shared buffer for packet forwarding purposes and enable
flexible partitioning of the shared buffer for different flows and ports,
enabling non-blocking progress of different flows as well as separation
of lossy traffic from loss-less traffic when using Per-Priority Flow
Control (PFC). The shared buffer optimizes the buffer utilization for better
absorption of packet bursts.

This patchset implements API which is based on the model SAI uses. That is
aligned with multiple ASIC vendors so this API should be vendor neutral.

Userspace counterpart patchset for devlink iproute2 tool can be found here:
https://github.com/jpirko/iproute2_mlxsw/tree/devlink_sb

Couple of examples of usage:

switch$ devlink sb help
Usage: devlink sb show [ DEV [ sb SB_INDEX ] ]
       devlink sb pool show [ DEV [ sb SB_INDEX ] pool POOL_INDEX ]
       devlink sb pool set DEV [ sb SB_INDEX ] pool POOL_INDEX
                           size POOL_SIZE thtype { static | dynamic }
       devlink sb port pool show [ DEV/PORT_INDEX [ sb SB_INDEX ]
                                   pool POOL_INDEX ]
       devlink sb port pool set DEV/PORT_INDEX [ sb SB_INDEX ]
                                pool POOL_INDEX th THRESHOLD
       devlink sb tc bind show [ DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX ]
       devlink sb tc bind set DEV/PORT_INDEX [ sb SB_INDEX ] tc TC_INDEX
                              type { ingress | egress } pool POOL_INDEX
                              th THRESHOLD
       devlink sb occupancy show { DEV | DEV/PORT_INDEX } [ sb SB_INDEX ]
       devlink sb occupancy snapshot DEV [ sb SB_INDEX ]
       devlink sb occupancy clearmax DEV [ sb SB_INDEX ]

switch$ devlink sb show
pci/0000:03:00.0: sb 0 size 16777216 ing_pools 4 eg_pools 4 ing_tcs 8 eg_tcs 8

switch$ devlink sb pool show
pci/0000:03:00.0: sb 0 pool 0 type ingress size 12400032 thtype dynamic
pci/0000:03:00.0: sb 0 pool 1 type ingress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 2 type ingress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 3 type ingress size 200064 thtype dynamic
pci/0000:03:00.0: sb 0 pool 4 type egress size 13220064 thtype dynamic
pci/0000:03:00.0: sb 0 pool 5 type egress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 6 type egress size 0 thtype dynamic
pci/0000:03:00.0: sb 0 pool 7 type egress size 0 thtype dynamic

switch$ devlink sb port pool show sw0p7 pool 0
sw0p7: sb 0 pool 0 threshold 16

switch$ sudo devlink sb port pool set sw0p7 pool 0 th 15

switch$ devlink sb port pool show sw0p7 pool 0
sw0p7: sb 0 pool 0 threshold 15

switch$ devlink sb tc bind show sw0p7 tc 0 type ingress
sw0p7: sb 0 tc 0 type ingress pool 0 threshold 10

switch$ sudo devlink sb tc bind set sw0p7 tc 0 type ingress pool 0 th 9

switch$ devlink sb tc bind show sw0p7 tc 0 type ingress
sw0p7: sb 0 tc 0 type ingress pool 0 threshold 9

switch$ sudo devlink sb occupancy snapshot pci/0000:03:00.0

switch$ devlink sb occupancy show sw0p7
sw0p7:
  pool: 0:      82944/3217344 1:          0/0       2:          0/0       3:          0/0
        4:          0/384     5:          0/0       6:          0/0       7:          0/0
  itc:  0(0):   96768/3217344 1(0):       0/0       2(0):       0/0       3(0):       0/0
        4(0):       0/0       5(0):       0/0       6(0):       0/0       7(0):       0/0
  etc:  0(4):       0/384     1(4):       0/0       2(4):       0/0       3(4):       0/0
        4(4):       0/0       5(4):       0/0       6(4):       0/0       7(4):       0/0

switch$ sudo devlink sb occupancy clearmax pci/0000:03:00.0
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 14, 2016
2 parents f38ba95 + 2d0ed39 commit cb68926
Show file tree
Hide file tree
Showing 10 changed files with 2,787 additions and 522 deletions.
682 changes: 505 additions & 177 deletions drivers/net/ethernet/mellanox/mlxsw/core.c

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions drivers/net/ethernet/mellanox/mlxsw/core.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <linux/gfp.h>
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/workqueue.h>
#include <net/devlink.h>

#include "trap.h"
Expand Down Expand Up @@ -108,6 +109,19 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
const struct mlxsw_event_listener *el,
void *priv);

typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload,
size_t payload_len, unsigned long cb_priv);

int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload,
struct list_head *bulk_list,
mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv);
int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list);

int mlxsw_reg_query(struct mlxsw_core *mlxsw_core,
const struct mlxsw_reg_info *reg, char *payload);
int mlxsw_reg_write(struct mlxsw_core *mlxsw_core,
Expand Down Expand Up @@ -137,11 +151,22 @@ struct mlxsw_core_port {
struct devlink_port devlink_port;
};

static inline void *
mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
{
/* mlxsw_core_port is ensured to always be the first field in driver
* port structure.
*/
return mlxsw_core_port;
}

int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
struct net_device *dev, bool split, u32 split_group);
void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port);

int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);

#define MLXSW_CONFIG_PROFILE_SWID_COUNT 8

struct mlxsw_swid_config {
Expand Down Expand Up @@ -200,6 +225,37 @@ struct mlxsw_driver {
int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port,
unsigned int count);
int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port);
int (*sb_pool_get)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info);
int (*sb_pool_set)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type);
int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_threshold);
int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 threshold);
int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 *p_pool_index, u32 *p_threshold);
int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u16 pool_index, u32 threshold);
int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core,
unsigned int sb_index);
int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 pool_index,
u32 *p_cur, u32 *p_max);
int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
u32 *p_cur, u32 *p_max);
void (*txhdr_construct)(struct sk_buff *skb,
const struct mlxsw_tx_info *tx_info);
u8 txhdr_len;
Expand Down
135 changes: 134 additions & 1 deletion drivers/net/ethernet/mellanox/mlxsw/reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -3566,6 +3566,10 @@ MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2);
*/
MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);

/* shared max_buff limits for dynamic threshold for SBCM, SBPM */
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14

/* reg_sbcm_max_buff
* When the pool associated to the port-pg/tclass is configured to
* static, Maximum buffer size for the limiter configured in cells.
Expand Down Expand Up @@ -3632,6 +3636,27 @@ MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4);
*/
MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2);

/* reg_sbpm_buff_occupancy
* Current buffer occupancy in cells.
* Access: RO
*/
MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24);

/* reg_sbpm_clr
* Clear Max Buffer Occupancy
* When this bit is set, max_buff_occupancy field is cleared (and a
* new max value is tracked from the time the clear was performed).
* Access: OP
*/
MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1);

/* reg_sbpm_max_buff_occupancy
* Maximum value of buffer occupancy in cells monitored. Cleared by
* writing to the clr field.
* Access: RO
*/
MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24);

/* reg_sbpm_min_buff
* Minimum buffer size for the limiter, in cells.
* Access: RW
Expand All @@ -3652,17 +3677,25 @@ MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24);
MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24);

static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool,
enum mlxsw_reg_sbxx_dir dir,
enum mlxsw_reg_sbxx_dir dir, bool clr,
u32 min_buff, u32 max_buff)
{
MLXSW_REG_ZERO(sbpm, payload);
mlxsw_reg_sbpm_local_port_set(payload, local_port);
mlxsw_reg_sbpm_pool_set(payload, pool);
mlxsw_reg_sbpm_dir_set(payload, dir);
mlxsw_reg_sbpm_clr_set(payload, clr);
mlxsw_reg_sbpm_min_buff_set(payload, min_buff);
mlxsw_reg_sbpm_max_buff_set(payload, max_buff);
}

static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy,
u32 *p_max_buff_occupancy)
{
*p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload);
*p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload);
}

/* SBMM - Shared Buffer Multicast Management Register
* --------------------------------------------------
* The SBMM register configures and retrieves the shared buffer allocation
Expand Down Expand Up @@ -3718,6 +3751,104 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff,
mlxsw_reg_sbmm_pool_set(payload, pool);
}

/* SBSR - Shared Buffer Status Register
* ------------------------------------
* The SBSR register retrieves the shared buffer occupancy according to
* Port-Pool. Note that this register enables reading a large amount of data.
* It is the user's responsibility to limit the amount of data to ensure the
* response can match the maximum transfer unit. In case the response exceeds
* the maximum transport unit, it will be truncated with no special notice.
*/
#define MLXSW_REG_SBSR_ID 0xB005
#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */
#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */
#define MLXSW_REG_SBSR_REC_MAX_COUNT 120
#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \
MLXSW_REG_SBSR_REC_LEN * \
MLXSW_REG_SBSR_REC_MAX_COUNT)

static const struct mlxsw_reg_info mlxsw_reg_sbsr = {
.id = MLXSW_REG_SBSR_ID,
.len = MLXSW_REG_SBSR_LEN,
};

/* reg_sbsr_clr
* Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy
* field is cleared (and a new max value is tracked from the time the clear
* was performed).
* Access: OP
*/
MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1);

/* reg_sbsr_ingress_port_mask
* Bit vector for all ingress network ports.
* Indicates which of the ports (for which the relevant bit is set)
* are affected by the set operation. Configuration of any other port
* does not change.
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1);

/* reg_sbsr_pg_buff_mask
* Bit vector for all switch priority groups.
* Indicates which of the priorities (for which the relevant bit is set)
* are affected by the set operation. Configuration of any other priority
* does not change.
* Range is 0..cap_max_pg_buffers - 1
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1);

/* reg_sbsr_egress_port_mask
* Bit vector for all egress network ports.
* Indicates which of the ports (for which the relevant bit is set)
* are affected by the set operation. Configuration of any other port
* does not change.
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1);

/* reg_sbsr_tclass_mask
* Bit vector for all traffic classes.
* Indicates which of the traffic classes (for which the relevant bit is
* set) are affected by the set operation. Configuration of any other
* traffic class does not change.
* Range is 0..cap_max_tclass - 1
* Access: Index
*/
MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1);

static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr)
{
MLXSW_REG_ZERO(sbsr, payload);
mlxsw_reg_sbsr_clr_set(payload, clr);
}

/* reg_sbsr_rec_buff_occupancy
* Current buffer occupancy in cells.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN,
0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false);

/* reg_sbsr_rec_max_buff_occupancy
* Maximum value of buffer occupancy in cells monitored. Cleared by
* writing to the clr field.
* Access: RO
*/
MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN,
0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false);

static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index,
u32 *p_buff_occupancy,
u32 *p_max_buff_occupancy)
{
*p_buff_occupancy =
mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index);
*p_max_buff_occupancy =
mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index);
}

static inline const char *mlxsw_reg_id_str(u16 reg_id)
{
switch (reg_id) {
Expand Down Expand Up @@ -3813,6 +3944,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "SBPM";
case MLXSW_REG_SBMM_ID:
return "SBMM";
case MLXSW_REG_SBSR_ID:
return "SBSR";
default:
return "*UNKNOWN*";
}
Expand Down
32 changes: 22 additions & 10 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum.c
Original file line number Diff line number Diff line change
Expand Up @@ -2434,6 +2434,7 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,

err_switchdev_init:
err_lag_init:
mlxsw_sp_buffers_fini(mlxsw_sp);
err_buffers_init:
err_flood_init:
mlxsw_sp_traps_fini(mlxsw_sp);
Expand All @@ -2448,6 +2449,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);

mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
Expand Down Expand Up @@ -2491,16 +2493,26 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
};

static struct mlxsw_driver mlxsw_sp_driver = {
.kind = MLXSW_DEVICE_KIND_SPECTRUM,
.owner = THIS_MODULE,
.priv_size = sizeof(struct mlxsw_sp),
.init = mlxsw_sp_init,
.fini = mlxsw_sp_fini,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.txhdr_construct = mlxsw_sp_txhdr_construct,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp_config_profile,
.kind = MLXSW_DEVICE_KIND_SPECTRUM,
.owner = THIS_MODULE,
.priv_size = sizeof(struct mlxsw_sp),
.init = mlxsw_sp_init,
.fini = mlxsw_sp_fini,
.port_split = mlxsw_sp_port_split,
.port_unsplit = mlxsw_sp_port_unsplit,
.sb_pool_get = mlxsw_sp_sb_pool_get,
.sb_pool_set = mlxsw_sp_sb_pool_set,
.sb_port_pool_get = mlxsw_sp_sb_port_pool_get,
.sb_port_pool_set = mlxsw_sp_sb_port_pool_set,
.sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get,
.sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set,
.sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot,
.sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
.sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
.sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
.txhdr_construct = mlxsw_sp_txhdr_construct,
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp_config_profile,
};

static int
Expand Down
Loading

0 comments on commit cb68926

Please sign in to comment.