From 016f426a14f09faa8bdb68b063c2947edf3108a1 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:09 +0300 Subject: [PATCH 01/14] net/mlx5: qos: Flesh out element_attributes in mlx5_ifc.h This is used for multiple purposes, depending on the scheduling element created. There are a few helper struct defined a long time ago, but they are not easy to find in the file and they are about to get new members. This commit cleans up this area a bit by: - moving the helper structs closer to where they are relevant. - defining a helper union to include all of them to help discoverability. - making use of it everywhere element_attributes is used. - using a consistent 'attr' name. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 18 +++-- include/linux/mlx5/mlx5_ifc.h | 67 ++++++++++--------- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 02a3563f51ad2..7154eeff4fd49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -339,7 +339,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group = vport->qos.group; struct mlx5_core_dev *dev = esw->dev; u32 parent_tsar_ix; - void *vport_elem; + void *attr; int err; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) @@ -348,8 +348,8 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, attr, vport_number, vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); @@ -443,8 +443,8 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; - __be32 *attr; u32 divider; + void *attr; int err; group = kzalloc(sizeof(*group), GFP_KERNEL); @@ -453,12 +453,10 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); - - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); - MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, esw->qos.root_tsar_ix); + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); err = mlx5_create_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, @@ -559,7 +557,7 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; - __be32 *attr; + void *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) @@ -573,7 +571,7 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96d369112bfa0..c79ba61976732 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4105,11 +4105,47 @@ enum { ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, }; +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +union mlx5_ifc_element_attributes_bits { + struct mlx5_ifc_tsar_element_bits tsar; + struct mlx5_ifc_vport_element_bits vport; + struct mlx5_ifc_vport_tc_element_bits vport_tc; + u8 reserved_at_0[0x20]; +}; + struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; - u8 element_attributes[0x20]; + union mlx5_ifc_element_attributes_bits element_attributes; u8 parent_element_id[0x20]; @@ -4798,35 +4834,6 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; -struct mlx5_ifc_vport_tc_element_bits { - u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; - u8 vport_number[0x10]; -}; - -struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; - u8 vport_number[0x10]; -}; - -enum { - TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, - TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, - TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, -}; - -enum { - TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, - TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, - TSAR_TYPE_CAP_MASK_ETS = 1 << 2, -}; - -struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; - u8 tsar_type[0x8]; - u8 reserved_at_10[0x10]; -}; - enum { MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1, From 158205ca4bafa98deeee977bb5de20de7d573285 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:10 +0300 Subject: [PATCH 02/14] net/mlx5: qos: Rename vport 'tsar' into 'sched_elem'. Vports do not use TSARs (Transmit Scheduling ARbiters), which are used for grouping multiple entities together. Use the correct name in variables and functions for clarity. Also move the scheduling context to a local variable in the esw_qos_sched_elem_config function instead of an empty parameter that needs to be provided by all callers. There is no functional change here. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../mlx5/core/esw/diag/qos_tracepoint.h | 16 ++++----- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 35 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++-- 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h index 1ce332f21ebe9..0ebbd699903d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -15,14 +15,14 @@ TRACE_EVENT(mlx5_esw_vport_qos_destroy, TP_ARGS(vport), TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) __field(unsigned short, vport_id) - __field(unsigned int, tsar_ix) + __field(unsigned int, sched_elem_ix) ), TP_fast_assign(__assign_str(devname); __entry->vport_id = vport->vport; - __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->sched_elem_ix = vport->qos.esw_sched_elem_ix; ), - TP_printk("(%s) vport=%hu tsar_ix=%u\n", - __get_str(devname), __entry->vport_id, __entry->tsar_ix + TP_printk("(%s) vport=%hu sched_elem_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->sched_elem_ix ) ); @@ -31,20 +31,20 @@ DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, TP_ARGS(vport, bw_share, max_rate), TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) __field(unsigned short, vport_id) - __field(unsigned int, tsar_ix) + __field(unsigned int, sched_elem_ix) __field(unsigned int, bw_share) __field(unsigned int, max_rate) __field(void *, group) ), TP_fast_assign(__assign_str(devname); __entry->vport_id = vport->vport; - __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->sched_elem_ix = vport->qos.esw_sched_elem_ix; __entry->bw_share = bw_share; __entry->max_rate = max_rate; __entry->group = vport->qos.group; ), - TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", - __get_str(devname), __entry->vport_id, __entry->tsar_ix, + TP_printk("(%s) vport=%hu sched_elem_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->sched_elem_ix, __entry->bw_share, __entry->max_rate, __entry->group ) ); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 7154eeff4fd49..73127f1dbf6e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -22,9 +22,10 @@ struct mlx5_esw_rate_group { struct list_head list; }; -static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, - u32 tsar_ix, u32 max_rate, u32 bw_share) +static int esw_qos_sched_elem_config(struct mlx5_core_dev *dev, u32 sched_elem_ix, + u32 max_rate, u32 bw_share) { + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; u32 bitmask = 0; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) @@ -38,20 +39,17 @@ static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, return mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, - tsar_ix, + sched_elem_ix, bitmask); } static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; int err; - err = esw_qos_tsar_config(dev, sched_ctx, - group->tsar_ix, - max_rate, bw_share); + err = esw_qos_sched_elem_config(dev, group->tsar_ix, max_rate, bw_share); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); @@ -65,20 +63,18 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; int err; if (!vport->qos.enabled) return -EIO; - err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, - max_rate, bw_share); + err = esw_qos_sched_elem_config(dev, vport->qos.esw_sched_elem_ix, max_rate, bw_share); if (err) { esw_warn(esw->dev, - "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + "E-Switch modify vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); - NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify vport scheduling element failed"); return err; } @@ -357,9 +353,10 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, - &vport->qos.esw_tsar_ix); + &vport->qos.esw_sched_elem_ix); if (err) { - esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + esw_warn(vport->dev, + "E-Switch create vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); return err; } @@ -378,9 +375,9 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_tsar_ix); + vport->qos.esw_sched_elem_ix); if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy vport scheduling element failed"); return err; } @@ -683,9 +680,9 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_tsar_ix); + vport->qos.esw_sched_elem_ix); if (err) - esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + esw_warn(esw->dev, "E-Switch destroy vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); memset(&vport->qos, 0, sizeof(vport->qos)); @@ -809,7 +806,7 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 err = mlx5_modify_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, - vport->qos.esw_tsar_ix, + vport->qos.esw_sched_elem_ix, bitmask); } mutex_unlock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f44b4c7ebcfd7..9bf05ae58af0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -213,9 +213,9 @@ struct mlx5_vport { struct mlx5_vport_info info; struct { - bool enabled; - u32 esw_tsar_ix; - u32 bw_share; + bool enabled; + u32 esw_sched_elem_ix; + u32 bw_share; u32 min_rate; u32 max_rate; struct mlx5_esw_rate_group *group; From 16efefde21f50b15ccc01e1993d578d34b201611 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:11 +0300 Subject: [PATCH 03/14] net/mlx5: qos: Consistently name vport vars as 'vport' The current mixture of 'vport' and 'evport' can be improved. There is no functional change. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 73127f1dbf6e1..8be4980fcc616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -88,7 +88,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, bool group_level) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_vport *evport; + struct mlx5_vport *vport; u32 max_guarantee = 0; unsigned long i; @@ -101,11 +101,11 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, max_guarantee = group->min_rate; } } else { - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled || !evport->qos.enabled || - evport->qos.group != group || evport->qos.min_rate < max_guarantee) + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.min_rate < max_guarantee) continue; - max_guarantee = evport->qos.min_rate; + max_guarantee = vport->qos.min_rate; } } @@ -134,24 +134,24 @@ static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); - struct mlx5_vport *evport; + struct mlx5_vport *vport; unsigned long i; u32 bw_share; int err; - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || vport->qos.group != group) continue; - bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + bw_share = esw_qos_calc_bw_share(vport->qos.min_rate, divider, fw_max_bw_share); - if (bw_share == evport->qos.bw_share) + if (bw_share == vport->qos.bw_share) continue; - err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + err = esw_qos_vport_config(esw, vport, vport->qos.max_rate, bw_share, extack); if (err) return err; - evport->qos.bw_share = bw_share; + vport->qos.bw_share = bw_share; } return 0; @@ -189,7 +189,7 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid return 0; } -static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, +static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 min_rate, struct netlink_ext_ack *extack) { u32 fw_max_bw_share, previous_min_rate; @@ -202,19 +202,19 @@ static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vpor fw_max_bw_share >= MLX5_MIN_BW_SHARE; if (min_rate && !min_rate_supported) return -EOPNOTSUPP; - if (min_rate == evport->qos.min_rate) + if (min_rate == vport->qos.min_rate) return 0; - previous_min_rate = evport->qos.min_rate; - evport->qos.min_rate = min_rate; - err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + previous_min_rate = vport->qos.min_rate; + vport->qos.min_rate = min_rate; + err = esw_qos_normalize_vports_min_rate(esw, vport->qos.group, extack); if (err) - evport->qos.min_rate = previous_min_rate; + vport->qos.min_rate = previous_min_rate; return err; } -static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, +static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 max_rate, struct netlink_ext_ack *extack) { u32 act_max_rate = max_rate; @@ -226,19 +226,19 @@ static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vpor if (max_rate && !max_rate_supported) return -EOPNOTSUPP; - if (max_rate == evport->qos.max_rate) + if (max_rate == vport->qos.max_rate) return 0; /* If parent group has rate limit need to set to group * value when new max rate is 0. */ - if (evport->qos.group && !max_rate) - act_max_rate = evport->qos.group->max_rate; + if (vport->qos.group && !max_rate) + act_max_rate = vport->qos.group->max_rate; - err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + err = esw_qos_vport_config(esw, vport, act_max_rate, vport->qos.bw_share, extack); if (!err) - evport->qos.max_rate = max_rate; + vport->qos.max_rate = max_rate; return err; } From 8746eeb7f80803009e2e137b8a6667820ea41c18 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:12 +0300 Subject: [PATCH 04/14] net/mlx5: qos: Refactor and document bw_share calculation The previous function (esw_qos_calculate_group_min_rate_divider) had two completely different modes of execution, depending on the 'group_level' parameter. Split it into two separate functions: - esw_qos_calculate_min_rate_divider - computes min across groups. - esw_qos_calculate_group_min_rate_divider - computes min in a group. Fold the divider calculation into the corresponding normalize functions to avoid having the caller compute the corresponding divider. Also rename the normalize functions to better indicate what level they're operating on. Finally, document everything so that this topic can more easily be understood by future maintainers. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 134 +++++++++--------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- 2 files changed, 71 insertions(+), 66 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 8be4980fcc616..a8231a498ed6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -11,13 +11,13 @@ /* Minimum supported BW share value by the HW is 1 Mbit/sec */ #define MLX5_MIN_BW_SHARE 1 -#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ - min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) struct mlx5_esw_rate_group { u32 tsar_ix; + /* Bandwidth parameters. */ u32 max_rate; u32 min_rate; + /* A computed value indicating relative min_rate between group members. */ u32 bw_share; struct list_head list; }; @@ -83,57 +83,77 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw, return 0; } -static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - bool group_level) +static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_vport *vport; u32 max_guarantee = 0; unsigned long i; - if (group_level) { - struct mlx5_esw_rate_group *group; - list_for_each_entry(group, &esw->qos.groups, list) { - if (group->min_rate < max_guarantee) - continue; - max_guarantee = group->min_rate; - } - } else { - mlx5_esw_for_each_vport(esw, i, vport) { - if (!vport->enabled || !vport->qos.enabled || - vport->qos.group != group || vport->qos.min_rate < max_guarantee) - continue; - max_guarantee = vport->qos.min_rate; - } + /* Find max min_rate across all vports in this group. + * This will correspond to fw_max_bw_share in the final bw_share calculation. + */ + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.min_rate < max_guarantee) + continue; + max_guarantee = vport->qos.min_rate; } if (max_guarantee) return max_t(u32, max_guarantee / fw_max_bw_share, 1); - /* If vports min rate divider is 0 but their group has bw_share configured, then - * need to set bw_share for vports to minimal value. + /* If vports max min_rate divider is 0 but their group has bw_share + * configured, then set bw_share for vports to minimal value. */ - if (!group_level && !max_guarantee && group && group->bw_share) + if (group && group->bw_share) return 1; + + /* A divider of 0 sets bw_share for all group vports to 0, + * effectively disabling min guarantees. + */ return 0; } -static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw) { - if (divider) - return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 max_guarantee = 0; + + /* Find max min_rate across all esw groups. + * This will correspond to fw_max_bw_share in the final bw_share calculation. + */ + list_for_each_entry(group, &esw->qos.groups, list) { + if (group->min_rate < max_guarantee) + continue; + max_guarantee = group->min_rate; + } + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If no group has min_rate configured, a divider of 0 sets all + * groups' bw_share to 0, effectively disabling min guarantees. + */ return 0; } -static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (!divider) + return 0; + return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max); +} + +static int esw_qos_normalize_group_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); + u32 divider = esw_qos_calculate_group_min_rate_divider(esw, group); struct mlx5_vport *vport; unsigned long i; u32 bw_share; @@ -157,10 +177,10 @@ static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, return 0; } -static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, - struct netlink_ext_ack *extack) +static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw); struct mlx5_esw_rate_group *group; u32 bw_share; int err; @@ -180,7 +200,7 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid /* All the group's vports need to be set with default bw_share * to enable them with QOS */ - err = esw_qos_normalize_vports_min_rate(esw, group, extack); + err = esw_qos_normalize_group_min_rate(esw, group, extack); if (err) return err; @@ -207,7 +227,7 @@ static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vpor previous_min_rate = vport->qos.min_rate; vport->qos.min_rate = min_rate; - err = esw_qos_normalize_vports_min_rate(esw, vport->qos.group, extack); + err = esw_qos_normalize_group_min_rate(esw, vport->qos.group, extack); if (err) vport->qos.min_rate = previous_min_rate; @@ -229,9 +249,7 @@ static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vpor if (max_rate == vport->qos.max_rate) return 0; - /* If parent group has rate limit need to set to group - * value when new max rate is 0. - */ + /* Use parent group limit if new max rate is 0. */ if (vport->qos.group && !max_rate) act_max_rate = vport->qos.group->max_rate; @@ -248,10 +266,10 @@ static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_ { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_core_dev *dev = esw->dev; - u32 previous_min_rate, divider; + u32 previous_min_rate; int err; - if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + if (!MLX5_CAP_QOS(dev, esw_bw_share) || fw_max_bw_share < MLX5_MIN_BW_SHARE) return -EOPNOTSUPP; if (min_rate == group->min_rate) @@ -259,15 +277,13 @@ static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_ previous_min_rate = group->min_rate; group->min_rate = min_rate; - divider = esw_qos_calculate_min_rate_divider(esw, group, true); - err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + err = esw_qos_normalize_min_rate(esw, extack); if (err) { - group->min_rate = previous_min_rate; NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); /* Attempt restoring previous configuration */ - divider = esw_qos_calculate_min_rate_divider(esw, group, true); - if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + group->min_rate = previous_min_rate; + if (esw_qos_normalize_min_rate(esw, extack)) NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); } @@ -291,9 +307,7 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, group->max_rate = max_rate; - /* Any unlimited vports in the group should be set - * with the value of the group. - */ + /* Any unlimited vports in the group should be set with the value of the group. */ mlx5_esw_for_each_vport(esw, i, vport) { if (!vport->enabled || !vport->qos.enabled || vport->qos.group != group || vport->qos.max_rate) @@ -382,12 +396,8 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, } vport->qos.group = new_group; + /* Use new group max rate if vport max rate is unlimited. */ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; - - /* If vport is unlimited, we set the group's value. - * Therefore, if the group is limited it will apply to - * the vport as well and if not, vport will remain unlimited. - */ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); @@ -428,8 +438,8 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, /* Recalculate bw share weights of old and new groups */ if (vport->qos.bw_share || new_group->bw_share) { - esw_qos_normalize_vports_min_rate(esw, curr_group, extack); - esw_qos_normalize_vports_min_rate(esw, new_group, extack); + esw_qos_normalize_group_min_rate(esw, curr_group, extack); + esw_qos_normalize_group_min_rate(esw, new_group, extack); } return 0; @@ -440,7 +450,6 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; - u32 divider; void *attr; int err; @@ -465,13 +474,10 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex list_add_tail(&group->list, &esw->qos.groups); - divider = esw_qos_calculate_min_rate_divider(esw, group, true); - if (divider) { - err = esw_qos_normalize_groups_min_rate(esw, divider, extack); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); - goto err_min_rate; - } + err = esw_qos_normalize_min_rate(esw, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; } trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); @@ -515,15 +521,13 @@ static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { - u32 divider; int err; list_del(&group->list); - divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); - err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + err = esw_qos_normalize_min_rate(esw, extack); if (err) - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9bf05ae58af0e..ce857eae6898d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -215,9 +215,10 @@ struct mlx5_vport { struct { bool enabled; u32 esw_sched_elem_ix; - u32 bw_share; u32 min_rate; u32 max_rate; + /* A computed value indicating relative min_rate between vports in a group. */ + u32 bw_share; struct mlx5_esw_rate_group *group; } qos; From d3a3b0765e18d78117cbf7b4cd61cd4a6ab2b5e5 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:13 +0300 Subject: [PATCH 05/14] net/mlx5: qos: Maintain rate group vport members in a list Previously, finding group members was done by iterating over all vports of an eswitch and comparing their group with the required one, but that approach will break down when a group can contain vports from multiple eswitches. Solve that by maintaining a list of vport members. Instead of iterating over esw vports, loop over the members list. Use this opportunity to provide two new functions to allocate and free a group, so that the number of state transitions is smaller. This will also be used in a future patch. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 94 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index a8231a498ed6f..cfff1413dcfc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -20,8 +20,17 @@ struct mlx5_esw_rate_group { /* A computed value indicating relative min_rate between group members. */ u32 bw_share; struct list_head list; + /* Vport members of this group.*/ + struct list_head members; }; +static void esw_qos_vport_set_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group) +{ + list_del_init(&vport->qos.group_entry); + vport->qos.group = group; + list_add_tail(&vport->qos.group_entry, &group->members); +} + static int esw_qos_sched_elem_config(struct mlx5_core_dev *dev, u32 sched_elem_ix, u32 max_rate, u32 bw_share) { @@ -89,17 +98,13 @@ static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_eswitch *esw, u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_vport *vport; u32 max_guarantee = 0; - unsigned long i; - /* Find max min_rate across all vports in this group. * This will correspond to fw_max_bw_share in the final bw_share calculation. */ - mlx5_esw_for_each_vport(esw, i, vport) { - if (!vport->enabled || !vport->qos.enabled || - vport->qos.group != group || vport->qos.min_rate < max_guarantee) - continue; - max_guarantee = vport->qos.min_rate; + list_for_each_entry(vport, &group->members, qos.group_entry) { + if (vport->qos.min_rate > max_guarantee) + max_guarantee = vport->qos.min_rate; } if (max_guarantee) @@ -155,13 +160,10 @@ static int esw_qos_normalize_group_min_rate(struct mlx5_eswitch *esw, u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); u32 divider = esw_qos_calculate_group_min_rate_divider(esw, group); struct mlx5_vport *vport; - unsigned long i; u32 bw_share; int err; - mlx5_esw_for_each_vport(esw, i, vport) { - if (!vport->enabled || !vport->qos.enabled || vport->qos.group != group) - continue; + list_for_each_entry(vport, &group->members, qos.group_entry) { bw_share = esw_qos_calc_bw_share(vport->qos.min_rate, divider, fw_max_bw_share); if (bw_share == vport->qos.bw_share) @@ -295,7 +297,6 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, u32 max_rate, struct netlink_ext_ack *extack) { struct mlx5_vport *vport; - unsigned long i; int err; if (group->max_rate == max_rate) @@ -308,9 +309,8 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, group->max_rate = max_rate; /* Any unlimited vports in the group should be set with the value of the group. */ - mlx5_esw_for_each_vport(esw, i, vport) { - if (!vport->enabled || !vport->qos.enabled || - vport->qos.group != group || vport->qos.max_rate) + list_for_each_entry(vport, &group->members, qos.group_entry) { + if (vport->qos.max_rate) continue; err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); @@ -395,7 +395,7 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, return err; } - vport->qos.group = new_group; + esw_qos_vport_set_group(vport, new_group); /* Use new group max rate if vport max rate is unlimited. */ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); @@ -407,7 +407,7 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, return 0; err_sched: - vport->qos.group = curr_group; + esw_qos_vport_set_group(vport, curr_group); max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", @@ -446,16 +446,33 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, } static struct mlx5_esw_rate_group * -__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +__esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix) { - u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; - void *attr; - int err; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) - return ERR_PTR(-ENOMEM); + return NULL; + + group->tsar_ix = tsar_ix; + INIT_LIST_HEAD(&group->members); + list_add_tail(&group->list, &esw->qos.groups); + return group; +} + +static void __esw_qos_free_rate_group(struct mlx5_esw_rate_group *group) +{ + list_del(&group->list); + kfree(group); +} + +static struct mlx5_esw_rate_group * +__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + int tsar_ix, err; + void *attr; MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); @@ -466,13 +483,18 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex err = mlx5_create_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, - &group->tsar_ix); + &tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); - goto err_sched_elem; + return ERR_PTR(err); } - list_add_tail(&group->list, &esw->qos.groups); + group = __esw_qos_alloc_rate_group(esw, tsar_ix); + if (!group) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc group failed"); + err = -ENOMEM; + goto err_alloc_group; + } err = esw_qos_normalize_min_rate(esw, extack); if (err) { @@ -484,13 +506,12 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex return group; err_min_rate: - list_del(&group->list); + __esw_qos_free_rate_group(group); +err_alloc_group: if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - group->tsar_ix)) + tsar_ix)) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); -err_sched_elem: - kfree(group); return ERR_PTR(err); } @@ -523,21 +544,19 @@ static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, { int err; - list_del(&group->list); - - err = esw_qos_normalize_min_rate(esw, extack); - if (err) - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, group->tsar_ix); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + __esw_qos_free_rate_group(group); - trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + err = esw_qos_normalize_min_rate(esw, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); - kfree(group); return err; } @@ -655,7 +674,8 @@ static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo if (err) return err; - vport->qos.group = esw->qos.group0; + INIT_LIST_HEAD(&vport->qos.group_entry); + esw_qos_vport_set_group(vport, esw->qos.group0); err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ce857eae6898d..f208ae16bfd2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -220,6 +220,7 @@ struct mlx5_vport { /* A computed value indicating relative min_rate between vports in a group. */ u32 bw_share; struct mlx5_esw_rate_group *group; + struct list_head group_entry; } qos; u16 vport; From a87a561b802a45d37bc34e5a8e4f57a213ea713f Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:14 +0300 Subject: [PATCH 06/14] net/mlx5: qos: Always create group0 All vports not explicitly members of a group with QoS enabled are part of the internal esw group0, except when the hw reports that groups aren't supported (log_esw_max_sched_depth == 0). This creates corner cases in the code, which has to make sure that this case is supported. Additionally, the groups are about to be moved out of eswitches, and group0 being NULL creates additional complications there. This patch makes sure to always create group0, even if max sched depth is 0. In that case, a software-only group0 is created referencing the root TSAR. Vports can point to this group when their QoS is enabled and they'll be attached to the root TSAR directly. This eliminates corner cases in the code by offering the guarantee that if qos is enabled, vport->qos.group is non-NULL. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 36 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++++--- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index cfff1413dcfc9..958b8894f5c09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -113,7 +113,7 @@ static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_eswitch *esw, /* If vports max min_rate divider is 0 but their group has bw_share * configured, then set bw_share for vports to minimal value. */ - if (group && group->bw_share) + if (group->bw_share) return 1; /* A divider of 0 sets bw_share for all group vports to 0, @@ -132,7 +132,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw) * This will correspond to fw_max_bw_share in the final bw_share calculation. */ list_for_each_entry(group, &esw->qos.groups, list) { - if (group->min_rate < max_guarantee) + if (group->min_rate < max_guarantee || group->tsar_ix == esw->qos.root_tsar_ix) continue; max_guarantee = group->min_rate; } @@ -188,6 +188,8 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e int err; list_for_each_entry(group, &esw->qos.groups, list) { + if (group->tsar_ix == esw->qos.root_tsar_ix) + continue; bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); if (bw_share == group->bw_share) @@ -252,7 +254,7 @@ static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vpor return 0; /* Use parent group limit if new max rate is 0. */ - if (vport->qos.group && !max_rate) + if (!max_rate) act_max_rate = vport->qos.group->max_rate; err = esw_qos_vport_config(esw, vport, act_max_rate, vport->qos.bw_share, extack); @@ -348,19 +350,17 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group = vport->qos.group; struct mlx5_core_dev *dev = esw->dev; - u32 parent_tsar_ix; void *attr; int err; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) return -EOPNOTSUPP; - parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, attr, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, group->tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); @@ -605,12 +605,17 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta INIT_LIST_HEAD(&esw->qos.groups); if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); - if (IS_ERR(esw->qos.group0)) { - esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", - PTR_ERR(esw->qos.group0)); - err = PTR_ERR(esw->qos.group0); - goto err_group0; - } + } else { + /* The eswitch doesn't support scheduling groups. + * Create a software-only group0 using the root TSAR to attach vport QoS to. + */ + if (!__esw_qos_alloc_rate_group(esw, esw->qos.root_tsar_ix)) + esw->qos.group0 = ERR_PTR(-ENOMEM); + } + if (IS_ERR(esw->qos.group0)) { + err = PTR_ERR(esw->qos.group0); + esw_warn(dev, "E-Switch create rate group 0 failed (%d)\n", err); + goto err_group0; } refcount_set(&esw->qos.refcnt, 1); @@ -628,8 +633,11 @@ static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; - if (esw->qos.group0) + if (esw->qos.group0->tsar_ix != esw->qos.root_tsar_ix) __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + else + __esw_qos_free_rate_group(esw->qos.group0); + esw->qos.group0 = NULL; err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -699,7 +707,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo lockdep_assert_held(&esw->state_lock); if (!vport->qos.enabled) return; - WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + WARN(vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); err = mlx5_destroy_scheduling_element_cmd(esw->dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f208ae16bfd2c..fec9e843f6731 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -213,6 +213,7 @@ struct mlx5_vport { struct mlx5_vport_info info; struct { + /* Initially false, set to true whenever any QoS features are used. */ bool enabled; u32 esw_sched_elem_ix; u32 min_rate; @@ -362,14 +363,17 @@ struct mlx5_eswitch { atomic64_t user_count; struct { - u32 root_tsar_ix; - struct mlx5_esw_rate_group *group0; - struct list_head groups; /* Protected by esw->state_lock */ - /* Protected by esw->state_lock. * Initially 0, meaning no QoS users and QoS is disabled. */ refcount_t refcnt; + u32 root_tsar_ix; + /* Contains all vports with QoS enabled but no explicit group. + * Cannot be NULL if QoS is enabled, but may be a fake group + * referencing the root TSAR if the esw doesn't support groups. + */ + struct mlx5_esw_rate_group *group0; + struct list_head groups; /* Protected by esw->state_lock */ } qos; struct mlx5_esw_bridge_offloads *br_offloads; From e9fa32f110867655eb396cef1f35b66278e53051 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:15 +0300 Subject: [PATCH 07/14] net/mlx5: qos: Drop 'esw' param from vport qos functions The vport has a pointer to its own eswitch in vport->dev->priv.eswitch, so passing the same eswitch as a parameter to the various functions manipulating vport qos is superfluous at best and prone to errors at worst. More importantly, with the upcoming cross-esw scheduling changes, the eswitch that should receive the various scheduling element commands is NOT the same as the vport's eswitch, so the current code's assumptions will break. To avoid confusion and bugs, this commit drops the 'esw' parameter from all vport qos functions and uses the vport's own eswitch pointer instead. Signed-off-by: Cosmin Ratiu Reviewed-by: Carolina Jubran Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../mellanox/mlx5/core/esw/devlink_port.c | 4 +- .../ethernet/mellanox/mlx5/core/esw/legacy.c | 2 +- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 95 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/esw/qos.h | 5 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +- .../mellanox/mlx5/core/eswitch_offloads.c | 4 +- 7 files changed, 57 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index f8869c9b68029..86af1891395f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -187,7 +187,7 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx return err; } -void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport) { struct mlx5_devlink_port *dl_port; @@ -195,7 +195,7 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct return; dl_port = vport->dl_port; - mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + mlx5_esw_qos_vport_update_group(vport, NULL, NULL); devl_rate_leaf_destroy(&dl_port->dl_port); devl_port_unregister(&dl_port->dl_port); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 8587cd572da53..3c8388706e152 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -521,7 +521,7 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, return PTR_ERR(evport); mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate); + err = mlx5_esw_qos_set_vport_rate(evport, max_rate, min_rate); mutex_unlock(&esw->state_lock); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 958b8894f5c09..baf68ffb07ccb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -67,20 +67,19 @@ static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_g return err; } -static int esw_qos_vport_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_vport_config(struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = esw->dev; int err; if (!vport->qos.enabled) return -EIO; - err = esw_qos_sched_elem_config(dev, vport->qos.esw_sched_elem_ix, max_rate, bw_share); + err = esw_qos_sched_elem_config(vport->dev, vport->qos.esw_sched_elem_ix, max_rate, + bw_share); if (err) { - esw_warn(esw->dev, + esw_warn(vport->dev, "E-Switch modify vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); NL_SET_ERR_MSG_MOD(extack, "E-Switch modify vport scheduling element failed"); @@ -169,7 +168,7 @@ static int esw_qos_normalize_group_min_rate(struct mlx5_eswitch *esw, if (bw_share == vport->qos.bw_share) continue; - err = esw_qos_vport_config(esw, vport, vport->qos.max_rate, bw_share, extack); + err = esw_qos_vport_config(vport, vport->qos.max_rate, bw_share, extack); if (err) return err; @@ -213,16 +212,17 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e return 0; } -static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, +static int esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; u32 fw_max_bw_share, previous_min_rate; bool min_rate_supported; int err; lockdep_assert_held(&esw->state_lock); - fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share = MLX5_CAP_QOS(vport->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(vport->dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE; if (min_rate && !min_rate_supported) return -EOPNOTSUPP; @@ -238,15 +238,16 @@ static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vpor return err; } -static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, +static int esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; u32 act_max_rate = max_rate; bool max_rate_supported; int err; lockdep_assert_held(&esw->state_lock); - max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + max_rate_supported = MLX5_CAP_QOS(vport->dev, esw_rate_limit); if (max_rate && !max_rate_supported) return -EOPNOTSUPP; @@ -257,7 +258,7 @@ static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vpor if (!max_rate) act_max_rate = vport->qos.group->max_rate; - err = esw_qos_vport_config(esw, vport, act_max_rate, vport->qos.bw_share, extack); + err = esw_qos_vport_config(vport, act_max_rate, vport->qos.bw_share, extack); if (!err) vport->qos.max_rate = max_rate; @@ -315,7 +316,7 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, if (vport->qos.max_rate) continue; - err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + err = esw_qos_vport_config(vport, max_rate, vport->qos.bw_share, extack); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch vport implicit rate limit setting failed"); @@ -343,13 +344,12 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) return false; } -static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, u32 max_rate, u32 bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group = vport->qos.group; - struct mlx5_core_dev *dev = esw->dev; + struct mlx5_core_dev *dev = vport->dev; void *attr; int err; @@ -369,7 +369,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, sched_ctx, &vport->qos.esw_sched_elem_ix); if (err) { - esw_warn(vport->dev, + esw_warn(dev, "E-Switch create vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); return err; @@ -378,8 +378,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, return 0; } -static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_update_group_scheduling_element(struct mlx5_vport *vport, struct mlx5_esw_rate_group *curr_group, struct mlx5_esw_rate_group *new_group, struct netlink_ext_ack *extack) @@ -387,7 +386,7 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, u32 max_rate; int err; - err = mlx5_destroy_scheduling_element_cmd(esw->dev, + err = mlx5_destroy_scheduling_element_cmd(vport->dev, SCHEDULING_HIERARCHY_E_SWITCH, vport->qos.esw_sched_elem_ix); if (err) { @@ -398,7 +397,7 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, esw_qos_vport_set_group(vport, new_group); /* Use new group max rate if vport max rate is unlimited. */ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; - err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + err = esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); goto err_sched; @@ -409,18 +408,18 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, err_sched: esw_qos_vport_set_group(vport, curr_group); max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; - if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) - esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + if (esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share)) + esw_warn(vport->dev, "E-Switch vport group restore failed (vport=%d)\n", vport->vport); return err; } -static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_rate_group *new_group, *curr_group; int err; @@ -432,7 +431,7 @@ static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, if (curr_group == new_group) return 0; - err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + err = esw_qos_update_group_scheduling_element(vport, curr_group, new_group, extack); if (err) return err; @@ -669,9 +668,10 @@ static void esw_qos_put(struct mlx5_eswitch *esw) esw_qos_destroy(esw); } -static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, +static int esw_qos_vport_enable(struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; lockdep_assert_held(&esw->state_lock); @@ -685,7 +685,7 @@ static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo INIT_LIST_HEAD(&vport->qos.group_entry); esw_qos_vport_set_group(vport, esw->qos.group0); - err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + err = esw_qos_vport_create_sched_element(vport, max_rate, bw_share); if (err) goto err_out; @@ -700,8 +700,9 @@ static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo return err; } -void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; lockdep_assert_held(&esw->state_lock); @@ -723,20 +724,19 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo esw_qos_put(esw); } -int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 max_rate, u32 min_rate) +int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; lockdep_assert_held(&esw->state_lock); - err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); + err = esw_qos_vport_enable(vport, 0, 0, NULL); if (err) return err; - err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); + err = esw_qos_set_vport_min_rate(vport, min_rate, NULL); if (!err) - err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); - + err = esw_qos_set_vport_max_rate(vport, max_rate, NULL); return err; } @@ -830,12 +830,12 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 mutex_lock(&esw->state_lock); if (!vport->qos.enabled) { /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ - err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); + err = esw_qos_vport_enable(vport, rate_mbps, vport->qos.bw_share, NULL); } else { MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; - err = mlx5_modify_scheduling_element_cmd(esw->dev, + err = mlx5_modify_scheduling_element_cmd(vport->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, vport->qos.esw_sched_elem_ix, @@ -897,11 +897,11 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void return err; mutex_lock(&esw->state_lock); - err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + err = esw_qos_vport_enable(vport, 0, 0, extack); if (err) goto unlock; - err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + err = esw_qos_set_vport_min_rate(vport, tx_share, extack); unlock: mutex_unlock(&esw->state_lock); return err; @@ -923,11 +923,11 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * return err; mutex_lock(&esw->state_lock); - err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + err = esw_qos_vport_enable(vport, 0, 0, extack); if (err) goto unlock; - err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + err = esw_qos_set_vport_max_rate(vport, tx_max, extack); unlock: mutex_unlock(&esw->state_lock); return err; @@ -1017,20 +1017,20 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, return err; } -int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err = 0; mutex_lock(&esw->state_lock); if (!vport->qos.enabled && !group) goto unlock; - err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + err = esw_qos_vport_enable(vport, 0, 0, extack); if (!err) - err = esw_qos_vport_update_group(esw, vport, group, extack); + err = esw_qos_vport_update_group(vport, group, extack); unlock: mutex_unlock(&esw->state_lock); return err; @@ -1045,9 +1045,8 @@ int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, struct mlx5_vport *vport = priv; if (!parent) - return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, - vport, NULL, extack); + return mlx5_esw_qos_vport_update_group(vport, NULL, extack); group = parent_priv; - return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); + return mlx5_esw_qos_vport_update_group(vport, group, extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 0141e9d52037f..c4f04c3e6a594 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -6,9 +6,8 @@ #ifdef CONFIG_MLX5_ESWITCH -int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, - u32 max_rate, u32 min_rate); -void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min_rate); +void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport); int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_share, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 17f78091ad30e..4a187f39dabae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -894,7 +894,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); - mlx5_esw_qos_vport_disable(esw, vport); + mlx5_esw_qos_vport_disable(vport); esw_vport_cleanup_acl(esw, vport); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fec9e843f6731..567276900a37f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -433,8 +433,7 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, u16 vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); -int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); @@ -812,7 +811,7 @@ int mlx5_esw_offloads_sf_devlink_port_init(struct mlx5_eswitch *esw, struct mlx5 void mlx5_esw_offloads_sf_devlink_port_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f24f91d213f24..fd34f43d18d58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2617,7 +2617,7 @@ int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vpor return err; load_err: - mlx5_esw_offloads_devlink_port_unregister(esw, vport); + mlx5_esw_offloads_devlink_port_unregister(vport); return err; } @@ -2628,7 +2628,7 @@ void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *v mlx5_esw_offloads_rep_unload(esw, vport->vport); - mlx5_esw_offloads_devlink_port_unregister(esw, vport); + mlx5_esw_offloads_devlink_port_unregister(vport); } static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, From b9cfe193eb8fa3468fbd5585f92ed77648488b98 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:16 +0300 Subject: [PATCH 08/14] net/mlx5: qos: Store the eswitch in a mlx5_esw_rate_group The rate groups are about to be moved out of eswitches, so store a reference to the eswitch they belong to so things can still work later. This allows dropping the esw parameter from a couple of functions and simplifying some of the code. Use this opportunity to make sure that vport scheduling element commands are always sent to the group eswitch, because that will be relevant for cross-esw scheduling. For now though, the eswitches are not different. There is no functionality change here. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 115 ++++++++---------- 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index baf68ffb07ccb..3de3460ec8cd3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -20,6 +20,8 @@ struct mlx5_esw_rate_group { /* A computed value indicating relative min_rate between group members. */ u32 bw_share; struct list_head list; + /* The eswitch this group belongs to. */ + struct mlx5_eswitch *esw; /* Vport members of this group.*/ struct list_head members; }; @@ -52,10 +54,10 @@ static int esw_qos_sched_elem_config(struct mlx5_core_dev *dev, u32 sched_elem_i bitmask); } -static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, +static int esw_qos_group_config(struct mlx5_esw_rate_group *group, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = esw->dev; + struct mlx5_core_dev *dev = group->esw->dev; int err; err = esw_qos_sched_elem_config(dev, group->tsar_ix, max_rate, bw_share); @@ -71,15 +73,12 @@ static int esw_qos_vport_config(struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { + struct mlx5_core_dev *dev = vport->qos.group->esw->dev; int err; - if (!vport->qos.enabled) - return -EIO; - - err = esw_qos_sched_elem_config(vport->dev, vport->qos.esw_sched_elem_ix, max_rate, - bw_share); + err = esw_qos_sched_elem_config(dev, vport->qos.esw_sched_elem_ix, max_rate, bw_share); if (err) { - esw_warn(vport->dev, + esw_warn(dev, "E-Switch modify vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); NL_SET_ERR_MSG_MOD(extack, "E-Switch modify vport scheduling element failed"); @@ -91,10 +90,9 @@ static int esw_qos_vport_config(struct mlx5_vport *vport, return 0; } -static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group) +static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_esw_rate_group *group) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 fw_max_bw_share = MLX5_CAP_QOS(group->esw->dev, max_tsar_bw_share); struct mlx5_vport *vport; u32 max_guarantee = 0; @@ -152,12 +150,11 @@ static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max); } -static int esw_qos_normalize_group_min_rate(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, +static int esw_qos_normalize_group_min_rate(struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - u32 divider = esw_qos_calculate_group_min_rate_divider(esw, group); + u32 fw_max_bw_share = MLX5_CAP_QOS(group->esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_group_min_rate_divider(group); struct mlx5_vport *vport; u32 bw_share; int err; @@ -194,7 +191,7 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e if (bw_share == group->bw_share) continue; - err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + err = esw_qos_group_config(group, group->max_rate, bw_share, extack); if (err) return err; @@ -203,7 +200,7 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e /* All the group's vports need to be set with default bw_share * to enable them with QOS */ - err = esw_qos_normalize_group_min_rate(esw, group, extack); + err = esw_qos_normalize_group_min_rate(group, extack); if (err) return err; @@ -231,7 +228,7 @@ static int esw_qos_set_vport_min_rate(struct mlx5_vport *vport, previous_min_rate = vport->qos.min_rate; vport->qos.min_rate = min_rate; - err = esw_qos_normalize_group_min_rate(esw, vport->qos.group, extack); + err = esw_qos_normalize_group_min_rate(vport->qos.group, extack); if (err) vport->qos.min_rate = previous_min_rate; @@ -266,15 +263,15 @@ static int esw_qos_set_vport_max_rate(struct mlx5_vport *vport, return err; } -static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, +static int esw_qos_set_group_min_rate(struct mlx5_esw_rate_group *group, u32 min_rate, struct netlink_ext_ack *extack) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_core_dev *dev = esw->dev; + struct mlx5_eswitch *esw = group->esw; u32 previous_min_rate; int err; - if (!MLX5_CAP_QOS(dev, esw_bw_share) || fw_max_bw_share < MLX5_MIN_BW_SHARE) + if (!MLX5_CAP_QOS(esw->dev, esw_bw_share) || + MLX5_CAP_QOS(esw->dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE) return -EOPNOTSUPP; if (min_rate == group->min_rate) @@ -295,8 +292,7 @@ static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_ return err; } -static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, +static int esw_qos_set_group_max_rate(struct mlx5_esw_rate_group *group, u32 max_rate, struct netlink_ext_ack *extack) { struct mlx5_vport *vport; @@ -305,7 +301,7 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, if (group->max_rate == max_rate) return 0; - err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + err = esw_qos_group_config(group, max_rate, group->bw_share, extack); if (err) return err; @@ -349,7 +345,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group = vport->qos.group; - struct mlx5_core_dev *dev = vport->dev; + struct mlx5_core_dev *dev = group->esw->dev; void *attr; int err; @@ -386,7 +382,7 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_vport *vport, u32 max_rate; int err; - err = mlx5_destroy_scheduling_element_cmd(vport->dev, + err = mlx5_destroy_scheduling_element_cmd(curr_group->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, vport->qos.esw_sched_elem_ix); if (err) { @@ -409,7 +405,7 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_vport *vport, esw_qos_vport_set_group(vport, curr_group); max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; if (esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share)) - esw_warn(vport->dev, "E-Switch vport group restore failed (vport=%d)\n", + esw_warn(curr_group->esw->dev, "E-Switch vport group restore failed (vport=%d)\n", vport->vport); return err; @@ -437,8 +433,8 @@ static int esw_qos_vport_update_group(struct mlx5_vport *vport, /* Recalculate bw share weights of old and new groups */ if (vport->qos.bw_share || new_group->bw_share) { - esw_qos_normalize_group_min_rate(esw, curr_group, extack); - esw_qos_normalize_group_min_rate(esw, new_group, extack); + esw_qos_normalize_group_min_rate(curr_group, extack); + esw_qos_normalize_group_min_rate(new_group, extack); } return 0; @@ -453,6 +449,7 @@ __esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix) if (!group) return NULL; + group->esw = esw; group->tsar_ix = tsar_ix; INIT_LIST_HEAD(&group->members); list_add_tail(&group->list, &esw->qos.groups); @@ -537,10 +534,10 @@ esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta return group; } -static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, +static int __esw_qos_destroy_rate_group(struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = group->esw; int err; trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); @@ -560,18 +557,6 @@ static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, return err; } -static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) -{ - int err; - - err = __esw_qos_destroy_rate_group(esw, group, extack); - esw_qos_put(esw); - - return err; -} - static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; @@ -633,7 +618,7 @@ static void esw_qos_destroy(struct mlx5_eswitch *esw) int err; if (esw->qos.group0->tsar_ix != esw->qos.root_tsar_ix) - __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + __esw_qos_destroy_rate_group(esw->qos.group0, NULL); else __esw_qos_free_rate_group(esw->qos.group0); esw->qos.group0 = NULL; @@ -703,6 +688,7 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + struct mlx5_core_dev *dev; int err; lockdep_assert_held(&esw->state_lock); @@ -711,11 +697,13 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) WARN(vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); - err = mlx5_destroy_scheduling_element_cmd(esw->dev, + dev = vport->qos.group->esw->dev; + err = mlx5_destroy_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, vport->qos.esw_sched_elem_ix); if (err) - esw_warn(esw->dev, "E-Switch destroy vport scheduling element failed (vport=%d,err=%d)\n", + esw_warn(dev, + "E-Switch destroy vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); memset(&vport->qos, 0, sizeof(vport->qos)); @@ -832,10 +820,11 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ err = esw_qos_vport_enable(vport, rate_mbps, vport->qos.bw_share, NULL); } else { - MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + struct mlx5_core_dev *dev = vport->qos.group->esw->dev; + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; - err = mlx5_modify_scheduling_element_cmd(vport->dev, + err = mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, vport->qos.esw_sched_elem_ix, @@ -936,17 +925,16 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); - struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw = group->esw; int err; - err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack); if (err) return err; mutex_lock(&esw->state_lock); - err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); + err = esw_qos_set_group_min_rate(group, tx_share, extack); mutex_unlock(&esw->state_lock); return err; } @@ -954,17 +942,16 @@ int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, u64 tx_max, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); - struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw = group->esw; int err; - err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack); if (err) return err; mutex_lock(&esw->state_lock); - err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); + err = esw_qos_set_group_max_rate(group, tx_max, extack); mutex_unlock(&esw->state_lock); return err; } @@ -1004,15 +991,12 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *group = priv; - struct mlx5_eswitch *esw; + struct mlx5_eswitch *esw = group->esw; int err; - esw = mlx5_devlink_eswitch_get(rate_node->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); - mutex_lock(&esw->state_lock); - err = esw_qos_destroy_rate_group(esw, group, extack); + err = __esw_qos_destroy_rate_group(group, extack); + esw_qos_put(esw); mutex_unlock(&esw->state_lock); return err; } @@ -1024,6 +1008,11 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err = 0; + if (group && group->esw != esw) { + NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported"); + return -EOPNOTSUPP; + } + mutex_lock(&esw->state_lock); if (!vport->qos.enabled && !group) goto unlock; From 0c4cf09eca83634e859c51be9dded6b535190a88 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:17 +0300 Subject: [PATCH 09/14] net/mlx5: qos: Add an explicit 'dev' to vport trace calls vport qos trace calls used vport->dev implicitly as the device to which the command was sent (and thus the device logged in traces). But that will no longer be the case for cross-esw scheduling, where the commands have to be sent to the group esw device instead. This commit corrects that. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../mlx5/core/esw/diag/qos_tracepoint.h | 23 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 6 ++--- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h index 0ebbd699903d6..645bad0d625f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -11,9 +11,9 @@ #include "eswitch.h" TRACE_EVENT(mlx5_esw_vport_qos_destroy, - TP_PROTO(const struct mlx5_vport *vport), - TP_ARGS(vport), - TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport), + TP_ARGS(dev, vport), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) __field(unsigned short, vport_id) __field(unsigned int, sched_elem_ix) ), @@ -27,9 +27,10 @@ TRACE_EVENT(mlx5_esw_vport_qos_destroy, ); DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, - TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), - TP_ARGS(vport, bw_share, max_rate), - TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport, + u32 bw_share, u32 max_rate), + TP_ARGS(dev, vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) __field(unsigned short, vport_id) __field(unsigned int, sched_elem_ix) __field(unsigned int, bw_share) @@ -50,13 +51,15 @@ DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, ); DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, - TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), - TP_ARGS(vport, bw_share, max_rate) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport, + u32 bw_share, u32 max_rate), + TP_ARGS(dev, vport, bw_share, max_rate) ); DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, - TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), - TP_ARGS(vport, bw_share, max_rate) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport, + u32 bw_share, u32 max_rate), + TP_ARGS(dev, vport, bw_share, max_rate) ); DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 3de3460ec8cd3..8b24076cbdb55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -85,7 +85,7 @@ static int esw_qos_vport_config(struct mlx5_vport *vport, return err; } - trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + trace_mlx5_esw_vport_qos_config(dev, vport, bw_share, max_rate); return 0; } @@ -675,7 +675,7 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, goto err_out; vport->qos.enabled = true; - trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, bw_share, max_rate); return 0; @@ -707,7 +707,7 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) vport->vport, err); memset(&vport->qos, 0, sizeof(vport->qos)); - trace_mlx5_esw_vport_qos_destroy(vport); + trace_mlx5_esw_vport_qos_destroy(dev, vport); esw_qos_put(esw); } From 43f9011a3d7a51f187116d2cf87de303934619a2 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:18 +0300 Subject: [PATCH 10/14] net/mlx5: qos: Rename rate group 'list' as 'parent_entry' 'list' is not very descriptive, I prefer list membership to clearly specify which list the entry belongs to. This commit renames the list entry into the esw groups list as 'parent_entry' to make the code more readable. This is a no-op change. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 8b24076cbdb55..5891a68633af6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -19,7 +19,7 @@ struct mlx5_esw_rate_group { u32 min_rate; /* A computed value indicating relative min_rate between group members. */ u32 bw_share; - struct list_head list; + struct list_head parent_entry; /* The eswitch this group belongs to. */ struct mlx5_eswitch *esw; /* Vport members of this group.*/ @@ -128,7 +128,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw) /* Find max min_rate across all esw groups. * This will correspond to fw_max_bw_share in the final bw_share calculation. */ - list_for_each_entry(group, &esw->qos.groups, list) { + list_for_each_entry(group, &esw->qos.groups, parent_entry) { if (group->min_rate < max_guarantee || group->tsar_ix == esw->qos.root_tsar_ix) continue; max_guarantee = group->min_rate; @@ -183,7 +183,7 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e u32 bw_share; int err; - list_for_each_entry(group, &esw->qos.groups, list) { + list_for_each_entry(group, &esw->qos.groups, parent_entry) { if (group->tsar_ix == esw->qos.root_tsar_ix) continue; bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); @@ -452,13 +452,13 @@ __esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix) group->esw = esw; group->tsar_ix = tsar_ix; INIT_LIST_HEAD(&group->members); - list_add_tail(&group->list, &esw->qos.groups); + list_add_tail(&group->parent_entry, &esw->qos.groups); return group; } static void __esw_qos_free_rate_group(struct mlx5_esw_rate_group *group) { - list_del(&group->list); + list_del(&group->parent_entry); kfree(group); } From 107a034d5c1e9cf86fdf4c8801ec8a07e6669520 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:19 +0300 Subject: [PATCH 11/14] net/mlx5: qos: Store rate groups in a qos domain Groups are currently maintained as a list in their corresponding eswitch, protected by the esw state_lock. The upcoming cross-eswitch scheduling feature cannot work with this approach, as it would require acquiring multiple eswitch locks (in the correct order) in order to maintain group membership. This commit moves the rate groups into a new 'qos domain' struct and adds explicit qos init/cleanup steps to the eswitch init/cleanup. Upcoming patches will expand the qos domain struct and allow it to be shared between eswitches. For now, qos domains are private to each esw so there's only an extra indirection. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 58 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/esw/qos.h | 3 + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 5891a68633af6..06b3a21a7475e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -11,6 +11,37 @@ /* Minimum supported BW share value by the HW is 1 Mbit/sec */ #define MLX5_MIN_BW_SHARE 1 +/* Holds rate groups associated with an E-Switch. */ +struct mlx5_qos_domain { + /* List of all mlx5_esw_rate_groups. */ + struct list_head groups; +}; + +static struct mlx5_qos_domain *esw_qos_domain_alloc(void) +{ + struct mlx5_qos_domain *qos_domain; + + qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL); + if (!qos_domain) + return NULL; + + INIT_LIST_HEAD(&qos_domain->groups); + + return qos_domain; +} + +static int esw_qos_domain_init(struct mlx5_eswitch *esw) +{ + esw->qos.domain = esw_qos_domain_alloc(); + + return esw->qos.domain ? 0 : -ENOMEM; +} + +static void esw_qos_domain_release(struct mlx5_eswitch *esw) +{ + kfree(esw->qos.domain); + esw->qos.domain = NULL; +} struct mlx5_esw_rate_group { u32 tsar_ix; @@ -19,6 +50,7 @@ struct mlx5_esw_rate_group { u32 min_rate; /* A computed value indicating relative min_rate between group members. */ u32 bw_share; + /* Membership in the qos domain 'groups' list. */ struct list_head parent_entry; /* The eswitch this group belongs to. */ struct mlx5_eswitch *esw; @@ -128,10 +160,10 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw) /* Find max min_rate across all esw groups. * This will correspond to fw_max_bw_share in the final bw_share calculation. */ - list_for_each_entry(group, &esw->qos.groups, parent_entry) { - if (group->min_rate < max_guarantee || group->tsar_ix == esw->qos.root_tsar_ix) - continue; - max_guarantee = group->min_rate; + list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) { + if (group->esw == esw && group->tsar_ix != esw->qos.root_tsar_ix && + group->min_rate > max_guarantee) + max_guarantee = group->min_rate; } if (max_guarantee) @@ -183,8 +215,8 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e u32 bw_share; int err; - list_for_each_entry(group, &esw->qos.groups, parent_entry) { - if (group->tsar_ix == esw->qos.root_tsar_ix) + list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) { + if (group->esw != esw || group->tsar_ix == esw->qos.root_tsar_ix) continue; bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); @@ -452,7 +484,7 @@ __esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix) group->esw = esw; group->tsar_ix = tsar_ix; INIT_LIST_HEAD(&group->members); - list_add_tail(&group->parent_entry, &esw->qos.groups); + list_add_tail(&group->parent_entry, &esw->qos.domain->groups); return group; } @@ -586,7 +618,6 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta return err; } - INIT_LIST_HEAD(&esw->qos.groups); if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); } else { @@ -868,6 +899,17 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return 0; } +int mlx5_esw_qos_init(struct mlx5_eswitch *esw) +{ + return esw_qos_domain_init(esw); +} + +void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw) +{ + if (esw->qos.domain) + esw_qos_domain_release(esw); +} + /* Eswitch devlink rate API */ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index c4f04c3e6a594..44fb339c5dcc8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -6,6 +6,9 @@ #ifdef CONFIG_MLX5_ESWITCH +int mlx5_esw_qos_init(struct mlx5_eswitch *esw); +void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw); + int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min_rate); void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 4a187f39dabae..9de819c45d335 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1481,6 +1481,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); mlx5_eq_notifier_register(esw->dev, &esw->nb); + err = mlx5_esw_qos_init(esw); + if (err) + goto err_qos_init; + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1489,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) } if (err) - goto abort; + goto err_esw_enable; esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; @@ -1503,7 +1507,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) return 0; -abort: +err_esw_enable: + mlx5_esw_qos_cleanup(esw); +err_qos_init: + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1631,6 +1638,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) if (esw->mode == MLX5_ESWITCH_OFFLOADS) devl_rate_nodes_destroy(devlink); + mlx5_esw_qos_cleanup(esw); } void mlx5_eswitch_disable(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 567276900a37f..e57be2eeec85b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -336,6 +336,7 @@ enum { }; struct dentry; +struct mlx5_qos_domain; struct mlx5_eswitch { struct mlx5_core_dev *dev; @@ -368,12 +369,12 @@ struct mlx5_eswitch { */ refcount_t refcnt; u32 root_tsar_ix; + struct mlx5_qos_domain *domain; /* Contains all vports with QoS enabled but no explicit group. * Cannot be NULL if QoS is enabled, but may be a fake group * referencing the root TSAR if the esw doesn't support groups. */ struct mlx5_esw_rate_group *group0; - struct list_head groups; /* Protected by esw->state_lock */ } qos; struct mlx5_esw_bridge_offloads *br_offloads; From 40efb0b7c755f7803abe59a81c3bdd73edf025d3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 8 Oct 2024 21:32:20 +0300 Subject: [PATCH 12/14] net/mlx5: qos: Refactor locking to a qos domain mutex E-Switch qos changes used the esw state_lock to serialize qos changes. With the introduction of cross-esw scheduling, multiple E-Switches might be involved in a qos operation, so prepare for that by switching locking to use a qos domain mutex. Add three helper functions: - esw_qos_lock - esw_qos_unlock - esw_assert_qos_lock_held Convert existing direct lock/unlock/lockdep calls to them. Also call esw_assert_qos_lock_held in a couple more places. mlx5_esw_qos_set_vport_rate expected to be called with the esw state_lock already held. Change it to instead acquire the qos lock directly. mlx5_eswitch_get_vport_config also accessed qos properties with the esw state lock. Introduce a new function mlx5_esw_qos_get_vport_rate to access those with the correct lock and change get_vport_config to use it. Finally, mlx5_vport_disable is called from the cleanup path with the esw state_lock held, so have it additionally acquire the qos lock to make sure there are no races. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../ethernet/mellanox/mlx5/core/esw/legacy.c | 6 +- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 92 +++++++++++++------ .../net/ethernet/mellanox/mlx5/core/esw/qos.h | 1 + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 8 +- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 +- 5 files changed, 74 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 3c8388706e152..288c797e4a78a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -513,15 +513,11 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - int err; if (!mlx5_esw_allowed(esw)) return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); - mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_rate(evport, max_rate, min_rate); - mutex_unlock(&esw->state_lock); - return err; + return mlx5_esw_qos_set_vport_rate(evport, max_rate, min_rate); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 06b3a21a7475e..be9abeb6e4aa0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -13,10 +13,27 @@ /* Holds rate groups associated with an E-Switch. */ struct mlx5_qos_domain { + /* Serializes access to all qos changes in the qos domain. */ + struct mutex lock; /* List of all mlx5_esw_rate_groups. */ struct list_head groups; }; +static void esw_qos_lock(struct mlx5_eswitch *esw) +{ + mutex_lock(&esw->qos.domain->lock); +} + +static void esw_qos_unlock(struct mlx5_eswitch *esw) +{ + mutex_unlock(&esw->qos.domain->lock); +} + +static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw) +{ + lockdep_assert_held(&esw->qos.domain->lock); +} + static struct mlx5_qos_domain *esw_qos_domain_alloc(void) { struct mlx5_qos_domain *qos_domain; @@ -25,6 +42,7 @@ static struct mlx5_qos_domain *esw_qos_domain_alloc(void) if (!qos_domain) return NULL; + mutex_init(&qos_domain->lock); INIT_LIST_HEAD(&qos_domain->groups); return qos_domain; @@ -249,7 +267,7 @@ static int esw_qos_set_vport_min_rate(struct mlx5_vport *vport, bool min_rate_supported; int err; - lockdep_assert_held(&esw->state_lock); + esw_assert_qos_lock_held(esw); fw_max_bw_share = MLX5_CAP_QOS(vport->dev, max_tsar_bw_share); min_rate_supported = MLX5_CAP_QOS(vport->dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE; @@ -275,7 +293,7 @@ static int esw_qos_set_vport_max_rate(struct mlx5_vport *vport, bool max_rate_supported; int err; - lockdep_assert_held(&esw->state_lock); + esw_assert_qos_lock_held(esw); max_rate_supported = MLX5_CAP_QOS(vport->dev, esw_rate_limit); if (max_rate && !max_rate_supported) @@ -451,9 +469,7 @@ static int esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *new_group, *curr_group; int err; - if (!vport->enabled) - return -EINVAL; - + esw_assert_qos_lock_held(esw); curr_group = vport->qos.group; new_group = group ?: esw->qos.group0; if (curr_group == new_group) @@ -552,6 +568,7 @@ esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta struct mlx5_esw_rate_group *group; int err; + esw_assert_qos_lock_held(esw); if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) return ERR_PTR(-EOPNOTSUPP); @@ -665,8 +682,7 @@ static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { int err = 0; - lockdep_assert_held(&esw->state_lock); - + esw_assert_qos_lock_held(esw); if (!refcount_inc_not_zero(&esw->qos.refcnt)) { /* esw_qos_create() set refcount to 1 only on success. * No need to decrement on failure. @@ -679,7 +695,7 @@ static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) static void esw_qos_put(struct mlx5_eswitch *esw) { - lockdep_assert_held(&esw->state_lock); + esw_assert_qos_lock_held(esw); if (refcount_dec_and_test(&esw->qos.refcnt)) esw_qos_destroy(esw); } @@ -690,7 +706,7 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; - lockdep_assert_held(&esw->state_lock); + esw_assert_qos_lock_held(esw); if (vport->qos.enabled) return 0; @@ -723,8 +739,9 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) int err; lockdep_assert_held(&esw->state_lock); + esw_qos_lock(esw); if (!vport->qos.enabled) - return; + goto unlock; WARN(vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); @@ -741,6 +758,8 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) trace_mlx5_esw_vport_qos_destroy(dev, vport); esw_qos_put(esw); +unlock: + esw_qos_unlock(esw); } int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate) @@ -748,17 +767,34 @@ int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_ struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; - lockdep_assert_held(&esw->state_lock); + esw_qos_lock(esw); err = esw_qos_vport_enable(vport, 0, 0, NULL); if (err) - return err; + goto unlock; err = esw_qos_set_vport_min_rate(vport, min_rate, NULL); if (!err) err = esw_qos_set_vport_max_rate(vport, max_rate, NULL); +unlock: + esw_qos_unlock(esw); return err; } +bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate) +{ + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + bool enabled; + + esw_qos_lock(esw); + enabled = vport->qos.enabled; + if (enabled) { + *max_rate = vport->qos.max_rate; + *min_rate = vport->qos.min_rate; + } + esw_qos_unlock(esw); + return enabled; +} + static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) { struct ethtool_link_ksettings lksettings; @@ -846,7 +882,7 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 return err; } - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); if (!vport->qos.enabled) { /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ err = esw_qos_vport_enable(vport, rate_mbps, vport->qos.bw_share, NULL); @@ -861,7 +897,7 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 vport->qos.esw_sched_elem_ix, bitmask); } - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -927,14 +963,14 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void if (err) return err; - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); err = esw_qos_vport_enable(vport, 0, 0, extack); if (err) goto unlock; err = esw_qos_set_vport_min_rate(vport, tx_share, extack); unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -953,14 +989,14 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * if (err) return err; - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); err = esw_qos_vport_enable(vport, 0, 0, extack); if (err) goto unlock; err = esw_qos_set_vport_max_rate(vport, tx_max, extack); unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -975,9 +1011,9 @@ int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void if (err) return err; - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); err = esw_qos_set_group_min_rate(group, tx_share, extack); - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -992,9 +1028,9 @@ int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void * if (err) return err; - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); err = esw_qos_set_group_max_rate(group, tx_max, extack); - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -1009,7 +1045,7 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); if (esw->mode != MLX5_ESWITCH_OFFLOADS) { NL_SET_ERR_MSG_MOD(extack, "Rate node creation supported only in switchdev mode"); @@ -1025,7 +1061,7 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, *priv = group; unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -1036,10 +1072,10 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct mlx5_eswitch *esw = group->esw; int err; - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); err = __esw_qos_destroy_rate_group(group, extack); esw_qos_put(esw); - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -1055,7 +1091,7 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, return -EOPNOTSUPP; } - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); if (!vport->qos.enabled && !group) goto unlock; @@ -1063,7 +1099,7 @@ int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, if (!err) err = esw_qos_vport_update_group(vport, group, extack); unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 44fb339c5dcc8..b4045efbaf9e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -10,6 +10,7 @@ int mlx5_esw_qos_init(struct mlx5_eswitch *esw); void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw); int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min_rate); +bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate); void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport); int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9de819c45d335..2bcd42305f46b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2068,6 +2068,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, u16 vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + u32 max_rate, min_rate; if (IS_ERR(evport)) return PTR_ERR(evport); @@ -2082,9 +2083,10 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; - if (evport->qos.enabled) { - ivi->min_tx_rate = evport->qos.min_rate; - ivi->max_tx_rate = evport->qos.max_rate; + + if (mlx5_esw_qos_get_vport_rate(evport, &max_rate, &min_rate)) { + ivi->max_tx_rate = max_rate; + ivi->min_tx_rate = min_rate; } mutex_unlock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index e57be2eeec85b..3b901bd36d4b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -212,6 +212,7 @@ struct mlx5_vport { struct mlx5_vport_info info; + /* Protected with the E-Switch qos domain lock. */ struct { /* Initially false, set to true whenever any QoS features are used. */ bool enabled; @@ -363,10 +364,9 @@ struct mlx5_eswitch { struct rw_semaphore mode_lock; atomic64_t user_count; + /* Protected with the E-Switch qos domain lock. */ struct { - /* Protected by esw->state_lock. - * Initially 0, meaning no QoS users and QoS is disabled. - */ + /* Initially 0, meaning no QoS users and QoS is disabled. */ refcount_t refcnt; u32 root_tsar_ix; struct mlx5_qos_domain *domain; From f91c69f43c545f46f256d907d55d24d06a4ac8f5 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 8 Oct 2024 21:32:21 +0300 Subject: [PATCH 13/14] net/mlx5: Unify QoS element type checks across NIC and E-Switch Refactor the QoS element type support check by introducing a new function, mlx5_qos_element_type_supported(), which handles element type validation for both NIC and E-Switch schedulers. This change removes the redundant esw_qos_element_type_supported() function and unifies the element type checks into a single implementation. Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 27 ++++------------ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/qos.c | 8 +++-- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 31 +++++++++++++++++++ 4 files changed, 44 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index be9abeb6e4aa0..ea68d86ea6ea8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -371,25 +371,6 @@ static int esw_qos_set_group_max_rate(struct mlx5_esw_rate_group *group, return err; } -static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) -{ - switch (type) { - case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TSAR; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT_TC; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; - } - return false; -} - static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, u32 max_rate, u32 bw_share) { @@ -399,7 +380,9 @@ static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, void *attr; int err; - if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) + if (!mlx5_qos_element_type_supported(dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, + SCHEDULING_HIERARCHY_E_SWITCH)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, element_type, @@ -616,7 +599,9 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) || + if (!mlx5_qos_element_type_supported(dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_E_SWITCH) || !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 62c770b0eaa83..5bb62051adc2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -224,6 +224,7 @@ void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, void *context, u32 *element_id); int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c index db2bd3ad63ba3..4d353da3eb7b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -28,7 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP)) + if (!mlx5_qos_element_type_supported(mdev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP, + SCHEDULING_HIERARCHY_NIC)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); @@ -47,7 +49,9 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; void *attr; - if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) || + if (!mlx5_qos_element_type_supported(mdev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_NIC) || !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 9f8b4005f4bd0..efadd575fb35d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -34,6 +34,37 @@ #include #include "mlx5_core.h" +bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy) +{ + int cap; + + switch (hierarchy) { + case SCHEDULING_HIERARCHY_E_SWITCH: + cap = MLX5_CAP_QOS(dev, esw_element_type); + break; + case SCHEDULING_HIERARCHY_NIC: + cap = MLX5_CAP_QOS(dev, nic_element_type); + break; + default: + return false; + } + + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return cap & ELEMENT_TYPE_CAP_MASK_TSAR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return cap & ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return cap & ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return cap & ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP: + return cap & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP; + } + + return false; +} + /* Scheduling element fw management */ int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, void *ctx, u32 *element_id) From e1013c792960324d9780f11acd88c5b2ed7747c5 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Tue, 8 Oct 2024 21:32:22 +0300 Subject: [PATCH 14/14] net/mlx5: Add support check for TSAR types in QoS scheduling Introduce a new function, mlx5_qos_tsar_type_supported(), to handle the validation of TSAR types within QoS scheduling contexts. Refactor the existing code to use this new function, replacing direct checks for TSAR type support in the NIC scheduling hierarchy. Signed-off-by: Carolina Jubran Signed-off-by: Tariq Toukan Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 4 ++- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/qos.c | 4 ++- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 27 +++++++++++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index ea68d86ea6ea8..ee6f76a6f0b57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -602,7 +602,9 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (!mlx5_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, SCHEDULING_HIERARCHY_E_SWITCH) || - !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) + !mlx5_qos_tsar_type_supported(dev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_E_SWITCH)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, tsar_ctx, element_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 5bb62051adc2e..99de67c3aa743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -225,6 +225,7 @@ int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy); +bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, void *context, u32 *element_id); int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c index 4d353da3eb7b0..6be9981bb6b1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -52,7 +52,9 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, if (!mlx5_qos_element_type_supported(mdev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, SCHEDULING_HIERARCHY_NIC) || - !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) + !mlx5_qos_tsar_type_supported(mdev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_NIC)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index efadd575fb35d..e393391966e0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -34,6 +34,33 @@ #include #include "mlx5_core.h" +bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy) +{ + int cap; + + switch (hierarchy) { + case SCHEDULING_HIERARCHY_E_SWITCH: + cap = MLX5_CAP_QOS(dev, esw_tsar_type); + break; + case SCHEDULING_HIERARCHY_NIC: + cap = MLX5_CAP_QOS(dev, nic_tsar_type); + break; + default: + return false; + } + + switch (type) { + case TSAR_ELEMENT_TSAR_TYPE_DWRR: + return cap & TSAR_TYPE_CAP_MASK_DWRR; + case TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN: + return cap & TSAR_TYPE_CAP_MASK_ROUND_ROBIN; + case TSAR_ELEMENT_TSAR_TYPE_ETS: + return cap & TSAR_TYPE_CAP_MASK_ETS; + } + + return false; +} + bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy) { int cap;