diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index f8869c9b68029..86af1891395f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -187,7 +187,7 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx return err; } -void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport) { struct mlx5_devlink_port *dl_port; @@ -195,7 +195,7 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct return; dl_port = vport->dl_port; - mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + mlx5_esw_qos_vport_update_group(vport, NULL, NULL); devl_rate_leaf_destroy(&dl_port->dl_port); devl_port_unregister(&dl_port->dl_port); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h index 1ce332f21ebe9..645bad0d625f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -11,52 +11,55 @@ #include "eswitch.h" TRACE_EVENT(mlx5_esw_vport_qos_destroy, - TP_PROTO(const struct mlx5_vport *vport), - TP_ARGS(vport), - TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport), + TP_ARGS(dev, vport), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) __field(unsigned short, vport_id) - __field(unsigned int, tsar_ix) + __field(unsigned int, sched_elem_ix) ), TP_fast_assign(__assign_str(devname); __entry->vport_id = vport->vport; - __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->sched_elem_ix = vport->qos.esw_sched_elem_ix; ), - TP_printk("(%s) vport=%hu tsar_ix=%u\n", - __get_str(devname), __entry->vport_id, __entry->tsar_ix + TP_printk("(%s) vport=%hu sched_elem_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->sched_elem_ix ) ); DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, - TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), - TP_ARGS(vport, bw_share, max_rate), - TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport, + u32 bw_share, u32 max_rate), + TP_ARGS(dev, vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) __field(unsigned short, vport_id) - __field(unsigned int, tsar_ix) + __field(unsigned int, sched_elem_ix) __field(unsigned int, bw_share) __field(unsigned int, max_rate) __field(void *, group) ), TP_fast_assign(__assign_str(devname); __entry->vport_id = vport->vport; - __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->sched_elem_ix = vport->qos.esw_sched_elem_ix; __entry->bw_share = bw_share; __entry->max_rate = max_rate; __entry->group = vport->qos.group; ), - TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", - __get_str(devname), __entry->vport_id, __entry->tsar_ix, + TP_printk("(%s) vport=%hu sched_elem_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->sched_elem_ix, __entry->bw_share, __entry->max_rate, __entry->group ) ); DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, - TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), - TP_ARGS(vport, bw_share, max_rate) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport, + u32 bw_share, u32 max_rate), + TP_ARGS(dev, vport, bw_share, max_rate) ); DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, - TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), - TP_ARGS(vport, bw_share, max_rate) + TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport, + u32 bw_share, u32 max_rate), + TP_ARGS(dev, vport, bw_share, max_rate) ); DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 8587cd572da53..288c797e4a78a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -513,15 +513,11 @@ int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); - int err; if (!mlx5_esw_allowed(esw)) return -EPERM; if (IS_ERR(evport)) return PTR_ERR(evport); - mutex_lock(&esw->state_lock); - err = mlx5_esw_qos_set_vport_rate(esw, evport, max_rate, min_rate); - mutex_unlock(&esw->state_lock); - return err; + return mlx5_esw_qos_set_vport_rate(evport, max_rate, min_rate); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 02a3563f51ad2..ee6f76a6f0b57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -11,20 +11,82 @@ /* Minimum supported BW share value by the HW is 1 Mbit/sec */ #define MLX5_MIN_BW_SHARE 1 -#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ - min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) +/* Holds rate groups associated with an E-Switch. */ +struct mlx5_qos_domain { + /* Serializes access to all qos changes in the qos domain. */ + struct mutex lock; + /* List of all mlx5_esw_rate_groups. */ + struct list_head groups; +}; + +static void esw_qos_lock(struct mlx5_eswitch *esw) +{ + mutex_lock(&esw->qos.domain->lock); +} + +static void esw_qos_unlock(struct mlx5_eswitch *esw) +{ + mutex_unlock(&esw->qos.domain->lock); +} + +static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw) +{ + lockdep_assert_held(&esw->qos.domain->lock); +} + +static struct mlx5_qos_domain *esw_qos_domain_alloc(void) +{ + struct mlx5_qos_domain *qos_domain; + + qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL); + if (!qos_domain) + return NULL; + + mutex_init(&qos_domain->lock); + INIT_LIST_HEAD(&qos_domain->groups); + + return qos_domain; +} + +static int esw_qos_domain_init(struct mlx5_eswitch *esw) +{ + esw->qos.domain = esw_qos_domain_alloc(); + + return esw->qos.domain ? 0 : -ENOMEM; +} + +static void esw_qos_domain_release(struct mlx5_eswitch *esw) +{ + kfree(esw->qos.domain); + esw->qos.domain = NULL; +} struct mlx5_esw_rate_group { u32 tsar_ix; + /* Bandwidth parameters. */ u32 max_rate; u32 min_rate; + /* A computed value indicating relative min_rate between group members. */ u32 bw_share; - struct list_head list; + /* Membership in the qos domain 'groups' list. */ + struct list_head parent_entry; + /* The eswitch this group belongs to. */ + struct mlx5_eswitch *esw; + /* Vport members of this group.*/ + struct list_head members; }; -static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, - u32 tsar_ix, u32 max_rate, u32 bw_share) +static void esw_qos_vport_set_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group) +{ + list_del_init(&vport->qos.group_entry); + vport->qos.group = group; + list_add_tail(&vport->qos.group_entry, &group->members); +} + +static int esw_qos_sched_elem_config(struct mlx5_core_dev *dev, u32 sched_elem_ix, + u32 max_rate, u32 bw_share) { + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; u32 bitmask = 0; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) @@ -38,20 +100,17 @@ static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, return mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, - tsar_ix, + sched_elem_ix, bitmask); } -static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, +static int esw_qos_group_config(struct mlx5_esw_rate_group *group, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - struct mlx5_core_dev *dev = esw->dev; + struct mlx5_core_dev *dev = group->esw->dev; int err; - err = esw_qos_tsar_config(dev, sched_ctx, - group->tsar_ix, - max_rate, bw_share); + err = esw_qos_sched_elem_config(dev, group->tsar_ix, max_rate, bw_share); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); @@ -60,122 +119,129 @@ static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_g return err; } -static int esw_qos_vport_config(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_vport_config(struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - struct mlx5_core_dev *dev = esw->dev; + struct mlx5_core_dev *dev = vport->qos.group->esw->dev; int err; - if (!vport->qos.enabled) - return -EIO; - - err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, - max_rate, bw_share); + err = esw_qos_sched_elem_config(dev, vport->qos.esw_sched_elem_ix, max_rate, bw_share); if (err) { - esw_warn(esw->dev, - "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + esw_warn(dev, + "E-Switch modify vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); - NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify vport scheduling element failed"); return err; } - trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + trace_mlx5_esw_vport_qos_config(dev, vport, bw_share, max_rate); return 0; } -static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - bool group_level) +static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_esw_rate_group *group) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_vport *evport; + u32 fw_max_bw_share = MLX5_CAP_QOS(group->esw->dev, max_tsar_bw_share); + struct mlx5_vport *vport; u32 max_guarantee = 0; - unsigned long i; - - if (group_level) { - struct mlx5_esw_rate_group *group; - list_for_each_entry(group, &esw->qos.groups, list) { - if (group->min_rate < max_guarantee) - continue; - max_guarantee = group->min_rate; - } - } else { - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled || !evport->qos.enabled || - evport->qos.group != group || evport->qos.min_rate < max_guarantee) - continue; - max_guarantee = evport->qos.min_rate; - } + /* Find max min_rate across all vports in this group. + * This will correspond to fw_max_bw_share in the final bw_share calculation. + */ + list_for_each_entry(vport, &group->members, qos.group_entry) { + if (vport->qos.min_rate > max_guarantee) + max_guarantee = vport->qos.min_rate; } if (max_guarantee) return max_t(u32, max_guarantee / fw_max_bw_share, 1); - /* If vports min rate divider is 0 but their group has bw_share configured, then - * need to set bw_share for vports to minimal value. + /* If vports max min_rate divider is 0 but their group has bw_share + * configured, then set bw_share for vports to minimal value. */ - if (!group_level && !max_guarantee && group && group->bw_share) + if (group->bw_share) return 1; + + /* A divider of 0 sets bw_share for all group vports to 0, + * effectively disabling min guarantees. + */ return 0; } -static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw) { - if (divider) - return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 max_guarantee = 0; + + /* Find max min_rate across all esw groups. + * This will correspond to fw_max_bw_share in the final bw_share calculation. + */ + list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) { + if (group->esw == esw && group->tsar_ix != esw->qos.root_tsar_ix && + group->min_rate > max_guarantee) + max_guarantee = group->min_rate; + } + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If no group has min_rate configured, a divider of 0 sets all + * groups' bw_share to 0, effectively disabling min guarantees. + */ return 0; } -static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (!divider) + return 0; + return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max); +} + +static int esw_qos_normalize_group_min_rate(struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); - struct mlx5_vport *evport; - unsigned long i; + u32 fw_max_bw_share = MLX5_CAP_QOS(group->esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_group_min_rate_divider(group); + struct mlx5_vport *vport; u32 bw_share; int err; - mlx5_esw_for_each_vport(esw, i, evport) { - if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) - continue; - bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + list_for_each_entry(vport, &group->members, qos.group_entry) { + bw_share = esw_qos_calc_bw_share(vport->qos.min_rate, divider, fw_max_bw_share); - if (bw_share == evport->qos.bw_share) + if (bw_share == vport->qos.bw_share) continue; - err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + err = esw_qos_vport_config(vport, vport->qos.max_rate, bw_share, extack); if (err) return err; - evport->qos.bw_share = bw_share; + vport->qos.bw_share = bw_share; } return 0; } -static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, - struct netlink_ext_ack *extack) +static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw); struct mlx5_esw_rate_group *group; u32 bw_share; int err; - list_for_each_entry(group, &esw->qos.groups, list) { + list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) { + if (group->esw != esw || group->tsar_ix == esw->qos.root_tsar_ix) + continue; bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); if (bw_share == group->bw_share) continue; - err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + err = esw_qos_group_config(group, group->max_rate, bw_share, extack); if (err) return err; @@ -184,7 +250,7 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid /* All the group's vports need to be set with default bw_share * to enable them with QOS */ - err = esw_qos_normalize_vports_min_rate(esw, group, extack); + err = esw_qos_normalize_group_min_rate(group, extack); if (err) return err; @@ -193,69 +259,69 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid return 0; } -static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, +static int esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; u32 fw_max_bw_share, previous_min_rate; bool min_rate_supported; int err; - lockdep_assert_held(&esw->state_lock); - fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + esw_assert_qos_lock_held(esw); + fw_max_bw_share = MLX5_CAP_QOS(vport->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(vport->dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE; if (min_rate && !min_rate_supported) return -EOPNOTSUPP; - if (min_rate == evport->qos.min_rate) + if (min_rate == vport->qos.min_rate) return 0; - previous_min_rate = evport->qos.min_rate; - evport->qos.min_rate = min_rate; - err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + previous_min_rate = vport->qos.min_rate; + vport->qos.min_rate = min_rate; + err = esw_qos_normalize_group_min_rate(vport->qos.group, extack); if (err) - evport->qos.min_rate = previous_min_rate; + vport->qos.min_rate = previous_min_rate; return err; } -static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, +static int esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; u32 act_max_rate = max_rate; bool max_rate_supported; int err; - lockdep_assert_held(&esw->state_lock); - max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + esw_assert_qos_lock_held(esw); + max_rate_supported = MLX5_CAP_QOS(vport->dev, esw_rate_limit); if (max_rate && !max_rate_supported) return -EOPNOTSUPP; - if (max_rate == evport->qos.max_rate) + if (max_rate == vport->qos.max_rate) return 0; - /* If parent group has rate limit need to set to group - * value when new max rate is 0. - */ - if (evport->qos.group && !max_rate) - act_max_rate = evport->qos.group->max_rate; + /* Use parent group limit if new max rate is 0. */ + if (!max_rate) + act_max_rate = vport->qos.group->max_rate; - err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + err = esw_qos_vport_config(vport, act_max_rate, vport->qos.bw_share, extack); if (!err) - evport->qos.max_rate = max_rate; + vport->qos.max_rate = max_rate; return err; } -static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, +static int esw_qos_set_group_min_rate(struct mlx5_esw_rate_group *group, u32 min_rate, struct netlink_ext_ack *extack) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_core_dev *dev = esw->dev; - u32 previous_min_rate, divider; + struct mlx5_eswitch *esw = group->esw; + u32 previous_min_rate; int err; - if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + if (!MLX5_CAP_QOS(esw->dev, esw_bw_share) || + MLX5_CAP_QOS(esw->dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE) return -EOPNOTSUPP; if (min_rate == group->min_rate) @@ -263,47 +329,40 @@ static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_ previous_min_rate = group->min_rate; group->min_rate = min_rate; - divider = esw_qos_calculate_min_rate_divider(esw, group, true); - err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + err = esw_qos_normalize_min_rate(esw, extack); if (err) { - group->min_rate = previous_min_rate; NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); /* Attempt restoring previous configuration */ - divider = esw_qos_calculate_min_rate_divider(esw, group, true); - if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + group->min_rate = previous_min_rate; + if (esw_qos_normalize_min_rate(esw, extack)) NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); } return err; } -static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, +static int esw_qos_set_group_max_rate(struct mlx5_esw_rate_group *group, u32 max_rate, struct netlink_ext_ack *extack) { struct mlx5_vport *vport; - unsigned long i; int err; if (group->max_rate == max_rate) return 0; - err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + err = esw_qos_group_config(group, max_rate, group->bw_share, extack); if (err) return err; group->max_rate = max_rate; - /* Any unlimited vports in the group should be set - * with the value of the group. - */ - mlx5_esw_for_each_vport(esw, i, vport) { - if (!vport->enabled || !vport->qos.enabled || - vport->qos.group != group || vport->qos.max_rate) + /* Any unlimited vports in the group should be set with the value of the group. */ + list_for_each_entry(vport, &group->members, qos.group_entry) { + if (vport->qos.max_rate) continue; - err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + err = esw_qos_vport_config(vport, max_rate, vport->qos.bw_share, extack); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch vport implicit rate limit setting failed"); @@ -312,54 +371,35 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, return err; } -static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) -{ - switch (type) { - case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TSAR; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT_TC; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; - } - return false; -} - -static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, u32 max_rate, u32 bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group = vport->qos.group; - struct mlx5_core_dev *dev = esw->dev; - u32 parent_tsar_ix; - void *vport_elem; + struct mlx5_core_dev *dev = group->esw->dev; + void *attr; int err; - if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) + if (!mlx5_qos_element_type_supported(dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, + SCHEDULING_HIERARCHY_E_SWITCH)) return -EOPNOTSUPP; - parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); - vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); - MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, attr, vport_number, vport->vport); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, group->tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, - &vport->qos.esw_tsar_ix); + &vport->qos.esw_sched_elem_ix); if (err) { - esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + esw_warn(dev, + "E-Switch create vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); return err; } @@ -367,8 +407,7 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, return 0; } -static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_update_group_scheduling_element(struct mlx5_vport *vport, struct mlx5_esw_rate_group *curr_group, struct mlx5_esw_rate_group *new_group, struct netlink_ext_ack *extack) @@ -376,22 +415,18 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, u32 max_rate; int err; - err = mlx5_destroy_scheduling_element_cmd(esw->dev, + err = mlx5_destroy_scheduling_element_cmd(curr_group->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_tsar_ix); + vport->qos.esw_sched_elem_ix); if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy vport scheduling element failed"); return err; } - vport->qos.group = new_group; + esw_qos_vport_set_group(vport, new_group); + /* Use new group max rate if vport max rate is unlimited. */ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; - - /* If vport is unlimited, we set the group's value. - * Therefore, if the group is limited it will apply to - * the vport as well and if not, vport will remain unlimited. - */ - err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + err = esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); goto err_sched; @@ -400,96 +435,110 @@ static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, return 0; err_sched: - vport->qos.group = curr_group; + esw_qos_vport_set_group(vport, curr_group); max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; - if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) - esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + if (esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share)) + esw_warn(curr_group->esw->dev, "E-Switch vport group restore failed (vport=%d)\n", vport->vport); return err; } -static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +static int esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_rate_group *new_group, *curr_group; int err; - if (!vport->enabled) - return -EINVAL; - + esw_assert_qos_lock_held(esw); curr_group = vport->qos.group; new_group = group ?: esw->qos.group0; if (curr_group == new_group) return 0; - err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + err = esw_qos_update_group_scheduling_element(vport, curr_group, new_group, extack); if (err) return err; /* Recalculate bw share weights of old and new groups */ if (vport->qos.bw_share || new_group->bw_share) { - esw_qos_normalize_vports_min_rate(esw, curr_group, extack); - esw_qos_normalize_vports_min_rate(esw, new_group, extack); + esw_qos_normalize_group_min_rate(curr_group, extack); + esw_qos_normalize_group_min_rate(new_group, extack); } return 0; } static struct mlx5_esw_rate_group * -__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +__esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix) { - u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; - __be32 *attr; - u32 divider; - int err; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) - return ERR_PTR(-ENOMEM); + return NULL; - MLX5_SET(scheduling_context, tsar_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + group->esw = esw; + group->tsar_ix = tsar_ix; + INIT_LIST_HEAD(&group->members); + list_add_tail(&group->parent_entry, &esw->qos.domain->groups); + return group; +} - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); +static void __esw_qos_free_rate_group(struct mlx5_esw_rate_group *group) +{ + list_del(&group->parent_entry); + kfree(group); +} + +static struct mlx5_esw_rate_group * +__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + int tsar_ix, err; + void *attr; + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, esw->qos.root_tsar_ix); + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); err = mlx5_create_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, - &group->tsar_ix); + &tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); - goto err_sched_elem; + return ERR_PTR(err); } - list_add_tail(&group->list, &esw->qos.groups); + group = __esw_qos_alloc_rate_group(esw, tsar_ix); + if (!group) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc group failed"); + err = -ENOMEM; + goto err_alloc_group; + } - divider = esw_qos_calculate_min_rate_divider(esw, group, true); - if (divider) { - err = esw_qos_normalize_groups_min_rate(esw, divider, extack); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); - goto err_min_rate; - } + err = esw_qos_normalize_min_rate(esw, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; } trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); return group; err_min_rate: - list_del(&group->list); + __esw_qos_free_rate_group(group); +err_alloc_group: if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - group->tsar_ix)) + tsar_ix)) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); -err_sched_elem: - kfree(group); return ERR_PTR(err); } @@ -502,6 +551,7 @@ esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta struct mlx5_esw_rate_group *group; int err; + esw_assert_qos_lock_held(esw); if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) return ERR_PTR(-EOPNOTSUPP); @@ -516,41 +566,25 @@ esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta return group; } -static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, +static int __esw_qos_destroy_rate_group(struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { - u32 divider; + struct mlx5_eswitch *esw = group->esw; int err; - list_del(&group->list); - - divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); - err = esw_qos_normalize_groups_min_rate(esw, divider, extack); - if (err) - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, group->tsar_ix); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + __esw_qos_free_rate_group(group); - trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); - - kfree(group); - - return err; -} - -static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) -{ - int err; + err = esw_qos_normalize_min_rate(esw, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); - err = __esw_qos_destroy_rate_group(esw, group, extack); - esw_qos_put(esw); return err; } @@ -559,21 +593,25 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; - __be32 *attr; + void *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) || - !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) + if (!mlx5_qos_element_type_supported(dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_E_SWITCH) || + !mlx5_qos_tsar_type_supported(dev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_E_SWITCH)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -584,15 +622,19 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta return err; } - INIT_LIST_HEAD(&esw->qos.groups); if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); - if (IS_ERR(esw->qos.group0)) { - esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", - PTR_ERR(esw->qos.group0)); - err = PTR_ERR(esw->qos.group0); - goto err_group0; - } + } else { + /* The eswitch doesn't support scheduling groups. + * Create a software-only group0 using the root TSAR to attach vport QoS to. + */ + if (!__esw_qos_alloc_rate_group(esw, esw->qos.root_tsar_ix)) + esw->qos.group0 = ERR_PTR(-ENOMEM); + } + if (IS_ERR(esw->qos.group0)) { + err = PTR_ERR(esw->qos.group0); + esw_warn(dev, "E-Switch create rate group 0 failed (%d)\n", err); + goto err_group0; } refcount_set(&esw->qos.refcnt, 1); @@ -610,8 +652,11 @@ static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; - if (esw->qos.group0) - __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + if (esw->qos.group0->tsar_ix != esw->qos.root_tsar_ix) + __esw_qos_destroy_rate_group(esw->qos.group0, NULL); + else + __esw_qos_free_rate_group(esw->qos.group0); + esw->qos.group0 = NULL; err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -624,8 +669,7 @@ static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { int err = 0; - lockdep_assert_held(&esw->state_lock); - + esw_assert_qos_lock_held(esw); if (!refcount_inc_not_zero(&esw->qos.refcnt)) { /* esw_qos_create() set refcount to 1 only on success. * No need to decrement on failure. @@ -638,17 +682,18 @@ static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) static void esw_qos_put(struct mlx5_eswitch *esw) { - lockdep_assert_held(&esw->state_lock); + esw_assert_qos_lock_held(esw); if (refcount_dec_and_test(&esw->qos.refcnt)) esw_qos_destroy(esw); } -static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, +static int esw_qos_vport_enable(struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; - lockdep_assert_held(&esw->state_lock); + esw_assert_qos_lock_held(esw); if (vport->qos.enabled) return 0; @@ -656,14 +701,15 @@ static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo if (err) return err; - vport->qos.group = esw->qos.group0; + INIT_LIST_HEAD(&vport->qos.group_entry); + esw_qos_vport_set_group(vport, esw->qos.group0); - err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + err = esw_qos_vport_create_sched_element(vport, max_rate, bw_share); if (err) goto err_out; vport->qos.enabled = true; - trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, bw_share, max_rate); return 0; @@ -673,46 +719,69 @@ static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo return err; } -void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + struct mlx5_core_dev *dev; int err; lockdep_assert_held(&esw->state_lock); + esw_qos_lock(esw); if (!vport->qos.enabled) - return; - WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + goto unlock; + WARN(vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); - err = mlx5_destroy_scheduling_element_cmd(esw->dev, + dev = vport->qos.group->esw->dev; + err = mlx5_destroy_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_tsar_ix); + vport->qos.esw_sched_elem_ix); if (err) - esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + esw_warn(dev, + "E-Switch destroy vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); memset(&vport->qos, 0, sizeof(vport->qos)); - trace_mlx5_esw_vport_qos_destroy(vport); + trace_mlx5_esw_vport_qos_destroy(dev, vport); esw_qos_put(esw); +unlock: + esw_qos_unlock(esw); } -int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, - u32 max_rate, u32 min_rate) +int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err; - lockdep_assert_held(&esw->state_lock); - err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); + esw_qos_lock(esw); + err = esw_qos_vport_enable(vport, 0, 0, NULL); if (err) - return err; + goto unlock; - err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); + err = esw_qos_set_vport_min_rate(vport, min_rate, NULL); if (!err) - err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); - + err = esw_qos_set_vport_max_rate(vport, max_rate, NULL); +unlock: + esw_qos_unlock(esw); return err; } +bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate) +{ + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + bool enabled; + + esw_qos_lock(esw); + enabled = vport->qos.enabled; + if (enabled) { + *max_rate = vport->qos.max_rate; + *min_rate = vport->qos.min_rate; + } + esw_qos_unlock(esw); + return enabled; +} + static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) { struct ethtool_link_ksettings lksettings; @@ -800,21 +869,22 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 return err; } - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); if (!vport->qos.enabled) { /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ - err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); + err = esw_qos_vport_enable(vport, rate_mbps, vport->qos.bw_share, NULL); } else { - MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + struct mlx5_core_dev *dev = vport->qos.group->esw->dev; + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; - err = mlx5_modify_scheduling_element_cmd(esw->dev, + err = mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, - vport->qos.esw_tsar_ix, + vport->qos.esw_sched_elem_ix, bitmask); } - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -852,6 +922,17 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return 0; } +int mlx5_esw_qos_init(struct mlx5_eswitch *esw) +{ + return esw_qos_domain_init(esw); +} + +void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw) +{ + if (esw->qos.domain) + esw_qos_domain_release(esw); +} + /* Eswitch devlink rate API */ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, @@ -869,14 +950,14 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void if (err) return err; - mutex_lock(&esw->state_lock); - err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + esw_qos_lock(esw); + err = esw_qos_vport_enable(vport, 0, 0, extack); if (err) goto unlock; - err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + err = esw_qos_set_vport_min_rate(vport, tx_share, extack); unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -895,50 +976,48 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * if (err) return err; - mutex_lock(&esw->state_lock); - err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + esw_qos_lock(esw); + err = esw_qos_vport_enable(vport, 0, 0, extack); if (err) goto unlock; - err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + err = esw_qos_set_vport_max_rate(vport, tx_max, extack); unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); - struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw = group->esw; int err; - err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack); if (err) return err; - mutex_lock(&esw->state_lock); - err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); - mutex_unlock(&esw->state_lock); + esw_qos_lock(esw); + err = esw_qos_set_group_min_rate(group, tx_share, extack); + esw_qos_unlock(esw); return err; } int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, u64 tx_max, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); - struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw = group->esw; int err; - err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack); if (err) return err; - mutex_lock(&esw->state_lock); - err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); - mutex_unlock(&esw->state_lock); + esw_qos_lock(esw); + err = esw_qos_set_group_max_rate(group, tx_max, extack); + esw_qos_unlock(esw); return err; } @@ -953,7 +1032,7 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->state_lock); + esw_qos_lock(esw); if (esw->mode != MLX5_ESWITCH_OFFLOADS) { NL_SET_ERR_MSG_MOD(extack, "Rate node creation supported only in switchdev mode"); @@ -969,7 +1048,7 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, *priv = group; unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -977,35 +1056,37 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *group = priv; - struct mlx5_eswitch *esw; + struct mlx5_eswitch *esw = group->esw; int err; - esw = mlx5_devlink_eswitch_get(rate_node->devlink); - if (IS_ERR(esw)) - return PTR_ERR(esw); - - mutex_lock(&esw->state_lock); - err = esw_qos_destroy_rate_group(esw, group, extack); - mutex_unlock(&esw->state_lock); + esw_qos_lock(esw); + err = __esw_qos_destroy_rate_group(group, extack); + esw_qos_put(esw); + esw_qos_unlock(esw); return err; } -int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err = 0; - mutex_lock(&esw->state_lock); + if (group && group->esw != esw) { + NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported"); + return -EOPNOTSUPP; + } + + esw_qos_lock(esw); if (!vport->qos.enabled && !group) goto unlock; - err = esw_qos_vport_enable(esw, vport, 0, 0, extack); + err = esw_qos_vport_enable(vport, 0, 0, extack); if (!err) - err = esw_qos_vport_update_group(esw, vport, group, extack); + err = esw_qos_vport_update_group(vport, group, extack); unlock: - mutex_unlock(&esw->state_lock); + esw_qos_unlock(esw); return err; } @@ -1018,9 +1099,8 @@ int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, struct mlx5_vport *vport = priv; if (!parent) - return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, - vport, NULL, extack); + return mlx5_esw_qos_vport_update_group(vport, NULL, extack); group = parent_priv; - return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); + return mlx5_esw_qos_vport_update_group(vport, group, extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 0141e9d52037f..b4045efbaf9e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -6,9 +6,12 @@ #ifdef CONFIG_MLX5_ESWITCH -int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, - u32 max_rate, u32 min_rate); -void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_qos_init(struct mlx5_eswitch *esw); +void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw); + +int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min_rate); +bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate); +void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport); int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_share, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 17f78091ad30e..2bcd42305f46b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -894,7 +894,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport vport_num, 1, MLX5_VPORT_ADMIN_STATE_DOWN); - mlx5_esw_qos_vport_disable(esw, vport); + mlx5_esw_qos_vport_disable(vport); esw_vport_cleanup_acl(esw, vport); } @@ -1481,6 +1481,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); mlx5_eq_notifier_register(esw->dev, &esw->nb); + err = mlx5_esw_qos_init(esw); + if (err) + goto err_qos_init; + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1489,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) } if (err) - goto abort; + goto err_esw_enable; esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; @@ -1503,7 +1507,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) return 0; -abort: +err_esw_enable: + mlx5_esw_qos_cleanup(esw); +err_qos_init: + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); mlx5_esw_acls_ns_cleanup(esw); return err; } @@ -1631,6 +1638,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) if (esw->mode == MLX5_ESWITCH_OFFLOADS) devl_rate_nodes_destroy(devlink); + mlx5_esw_qos_cleanup(esw); } void mlx5_eswitch_disable(struct mlx5_eswitch *esw) @@ -2060,6 +2068,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, u16 vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + u32 max_rate, min_rate; if (IS_ERR(evport)) return PTR_ERR(evport); @@ -2074,9 +2083,10 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->qos = evport->info.qos; ivi->spoofchk = evport->info.spoofchk; ivi->trusted = evport->info.trusted; - if (evport->qos.enabled) { - ivi->min_tx_rate = evport->qos.min_rate; - ivi->max_tx_rate = evport->qos.max_rate; + + if (mlx5_esw_qos_get_vport_rate(evport, &max_rate, &min_rate)) { + ivi->max_tx_rate = max_rate; + ivi->min_tx_rate = min_rate; } mutex_unlock(&esw->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f44b4c7ebcfd7..3b901bd36d4b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -212,13 +212,17 @@ struct mlx5_vport { struct mlx5_vport_info info; + /* Protected with the E-Switch qos domain lock. */ struct { - bool enabled; - u32 esw_tsar_ix; - u32 bw_share; + /* Initially false, set to true whenever any QoS features are used. */ + bool enabled; + u32 esw_sched_elem_ix; u32 min_rate; u32 max_rate; + /* A computed value indicating relative min_rate between vports in a group. */ + u32 bw_share; struct mlx5_esw_rate_group *group; + struct list_head group_entry; } qos; u16 vport; @@ -333,6 +337,7 @@ enum { }; struct dentry; +struct mlx5_qos_domain; struct mlx5_eswitch { struct mlx5_core_dev *dev; @@ -359,15 +364,17 @@ struct mlx5_eswitch { struct rw_semaphore mode_lock; atomic64_t user_count; + /* Protected with the E-Switch qos domain lock. */ struct { - u32 root_tsar_ix; - struct mlx5_esw_rate_group *group0; - struct list_head groups; /* Protected by esw->state_lock */ - - /* Protected by esw->state_lock. - * Initially 0, meaning no QoS users and QoS is disabled. - */ + /* Initially 0, meaning no QoS users and QoS is disabled. */ refcount_t refcnt; + u32 root_tsar_ix; + struct mlx5_qos_domain *domain; + /* Contains all vports with QoS enabled but no explicit group. + * Cannot be NULL if QoS is enabled, but may be a fake group + * referencing the root TSAR if the esw doesn't support groups. + */ + struct mlx5_esw_rate_group *group0; } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -427,8 +434,7 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, u16 vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); -int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); @@ -806,7 +812,7 @@ int mlx5_esw_offloads_sf_devlink_port_init(struct mlx5_eswitch *esw, struct mlx5 void mlx5_esw_offloads_sf_devlink_port_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport); struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f24f91d213f24..fd34f43d18d58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2617,7 +2617,7 @@ int mlx5_esw_offloads_load_rep(struct mlx5_eswitch *esw, struct mlx5_vport *vpor return err; load_err: - mlx5_esw_offloads_devlink_port_unregister(esw, vport); + mlx5_esw_offloads_devlink_port_unregister(vport); return err; } @@ -2628,7 +2628,7 @@ void mlx5_esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_vport *v mlx5_esw_offloads_rep_unload(esw, vport->vport); - mlx5_esw_offloads_devlink_port_unregister(esw, vport); + mlx5_esw_offloads_devlink_port_unregister(vport); } static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 62c770b0eaa83..99de67c3aa743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -224,6 +224,8 @@ void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy); +bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy); int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, void *context, u32 *element_id); int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c index db2bd3ad63ba3..6be9981bb6b1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -28,7 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; - if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP)) + if (!mlx5_qos_element_type_supported(mdev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP, + SCHEDULING_HIERARCHY_NIC)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); @@ -47,8 +49,12 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; void *attr; - if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) || - !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) + if (!mlx5_qos_element_type_supported(mdev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_NIC) || + !mlx5_qos_tsar_type_supported(mdev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_NIC)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index 9f8b4005f4bd0..e393391966e0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -34,6 +34,64 @@ #include #include "mlx5_core.h" +bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy) +{ + int cap; + + switch (hierarchy) { + case SCHEDULING_HIERARCHY_E_SWITCH: + cap = MLX5_CAP_QOS(dev, esw_tsar_type); + break; + case SCHEDULING_HIERARCHY_NIC: + cap = MLX5_CAP_QOS(dev, nic_tsar_type); + break; + default: + return false; + } + + switch (type) { + case TSAR_ELEMENT_TSAR_TYPE_DWRR: + return cap & TSAR_TYPE_CAP_MASK_DWRR; + case TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN: + return cap & TSAR_TYPE_CAP_MASK_ROUND_ROBIN; + case TSAR_ELEMENT_TSAR_TYPE_ETS: + return cap & TSAR_TYPE_CAP_MASK_ETS; + } + + return false; +} + +bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hierarchy) +{ + int cap; + + switch (hierarchy) { + case SCHEDULING_HIERARCHY_E_SWITCH: + cap = MLX5_CAP_QOS(dev, esw_element_type); + break; + case SCHEDULING_HIERARCHY_NIC: + cap = MLX5_CAP_QOS(dev, nic_element_type); + break; + default: + return false; + } + + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return cap & ELEMENT_TYPE_CAP_MASK_TSAR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return cap & ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return cap & ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return cap & ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP: + return cap & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP; + } + + return false; +} + /* Scheduling element fw management */ int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy, void *ctx, u32 *element_id) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 96d369112bfa0..c79ba61976732 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4105,11 +4105,47 @@ enum { ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, }; +enum { + TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, + TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, + TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, +}; + +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + +struct mlx5_ifc_tsar_element_bits { + u8 reserved_at_0[0x8]; + u8 tsar_type[0x8]; + u8 reserved_at_10[0x10]; +}; + +struct mlx5_ifc_vport_element_bits { + u8 reserved_at_0[0x10]; + u8 vport_number[0x10]; +}; + +struct mlx5_ifc_vport_tc_element_bits { + u8 traffic_class[0x4]; + u8 reserved_at_4[0xc]; + u8 vport_number[0x10]; +}; + +union mlx5_ifc_element_attributes_bits { + struct mlx5_ifc_tsar_element_bits tsar; + struct mlx5_ifc_vport_element_bits vport; + struct mlx5_ifc_vport_tc_element_bits vport_tc; + u8 reserved_at_0[0x20]; +}; + struct mlx5_ifc_scheduling_context_bits { u8 element_type[0x8]; u8 reserved_at_8[0x18]; - u8 element_attributes[0x20]; + union mlx5_ifc_element_attributes_bits element_attributes; u8 parent_element_id[0x20]; @@ -4798,35 +4834,6 @@ struct mlx5_ifc_register_loopback_control_bits { u8 reserved_at_20[0x60]; }; -struct mlx5_ifc_vport_tc_element_bits { - u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; - u8 vport_number[0x10]; -}; - -struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; - u8 vport_number[0x10]; -}; - -enum { - TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, - TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, - TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, -}; - -enum { - TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, - TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, - TSAR_TYPE_CAP_MASK_ETS = 1 << 2, -}; - -struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; - u8 tsar_type[0x8]; - u8 reserved_at_10[0x10]; -}; - enum { MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_SUCCESS = 0x0, MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL = 0x1,