From 18a92b05425493c3d131c47689443d7ae860c986 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 8 Mar 2023 00:02:12 +0200 Subject: [PATCH 01/15] net/mlx5: Simplify unload all rep code Instead of using type specific iterators which are only used in one place just traverse the xarray. It will provide suitable ordering based on the vport numbers. This will also eliminate the need for changes here when new types are added. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 48 +------------------ 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index eafb098db6b02..6259824545757 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -55,13 +55,6 @@ #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) -#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ - xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF) - -#define mlx5_esw_for_each_vf_rep(esw, index, rep) \ - mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \ - rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF) - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ @@ -2191,18 +2184,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return 0; } -static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, - xa_mark_t mark) -{ - bool mark_set; - - /* Copy the mark from vport to its rep */ - mark_set = xa_get_mark(&esw->vports, rep->vport, mark); - if (mark_set) - xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark); -} - static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) { struct mlx5_eswitch_rep *rep; @@ -2222,9 +2203,6 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx if (err) goto insert_err; - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF); return 0; insert_err: @@ -2365,37 +2343,13 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - unsigned long i; - - mlx5_esw_for_each_sf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); -} - static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; unsigned long i; - __unload_reps_sf_vport(esw, rep_type); - - mlx5_esw_for_each_vf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + mlx5_esw_for_each_rep(esw, i, rep) __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); } int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) From 93b36d0f2892357906f1058778c9188ff857baa1 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 00:27:21 +0200 Subject: [PATCH 02/15] net/mlx5: mlx5_ifc updates for embedded CPU SRIOV Add ec_vf_vport_base to HCA Capabilities 2. This indicates the base vport of embedded CPU virtual functions that are connected to the eswitch. Add ec_vf_function to query/set_hca_caps. If set this indicates accessing a virtual function on the embedded CPU by function ID. This should only be used with other_function set to 1. Signed-off-by: Daniel Jurgens Reviewed-by: Bodong Wang Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index af3a92ad2e6b1..1f4f62cb9f344 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1992,7 +1992,10 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 ts_cqe_metadata_size2wqe_counter[0x5]; u8 reserved_at_250[0x10]; - u8 reserved_at_260[0x5a0]; + u8 reserved_at_260[0x120]; + u8 reserved_at_380[0x10]; + u8 ec_vf_vport_base[0x10]; + u8 reserved_at_3a0[0x460]; }; enum mlx5_ifc_flow_destination_type { @@ -4805,7 +4808,8 @@ struct mlx5_ifc_set_hca_cap_in_bits { u8 op_mod[0x10]; u8 other_function[0x1]; - u8 reserved_at_41[0xf]; + u8 ec_vf_function[0x1]; + u8 reserved_at_42[0xe]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; @@ -5956,7 +5960,8 @@ struct mlx5_ifc_query_hca_cap_in_bits { u8 op_mod[0x10]; u8 other_function[0x1]; - u8 reserved_at_41[0xf]; + u8 ec_vf_function[0x1]; + u8 reserved_at_42[0xe]; u8 function_id[0x10]; u8 reserved_at_60[0x20]; From dc13180824b78e1e4e7ae1ce22160ae8e5fb858e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:36:14 +0200 Subject: [PATCH 03/15] net/mlx5: Enable devlink port for embedded cpu VF vports Enable creation of a devlink port for EC VF vports. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/devlink_port.c | 8 +++++++- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 20 +++++++++++++++++++ include/linux/mlx5/driver.h | 6 ++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index f370f67d9e331..af779c700278e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -18,7 +18,8 @@ static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_ { return vport_num == MLX5_VPORT_UPLINK || (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || - mlx5_eswitch_is_vf_vport(esw, vport_num); + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_core_is_ec_vf_vport(esw->dev, vport_num); } static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) @@ -56,6 +57,11 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); + } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + vport_num - 1, false); } return dl_port; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa4..0e7b5c6e4020f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -343,4 +343,24 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); +static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_2(dev, ec_vf_vport_base); +} + +static inline u16 mlx5_core_ec_sriov_enabled(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf(dev) && mlx5_core_ec_vf_vport_base(dev); +} + +static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 vport_num) +{ + int base_vport = mlx5_core_ec_vf_vport_base(dev); + int max_vport = base_vport + mlx5_core_max_ec_vfs(dev); + + if (!mlx5_core_ec_sriov_enabled(dev)) + return false; + + return (vport_num >= base_vport && vport_num < max_vport); +} #endif /* __MLX5_CORE_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a744c48eec2a..252b6a6965b82 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -474,6 +474,7 @@ struct mlx5_core_sriov { struct mlx5_vf_context *vfs_ctx; int num_vfs; u16 max_vfs; + u16 max_ec_vfs; }; struct mlx5_fc_pool { @@ -1244,6 +1245,11 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline u16 mlx5_core_max_ec_vfs(const struct mlx5_core_dev *dev) +{ + return dev->priv.sriov.max_ec_vfs; +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { From 9ac0b128248e19d06475f4592fe87f6ce18bc554 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:51:22 +0200 Subject: [PATCH 04/15] net/mlx5: Update vport caps query/set for EC VFs These functions are for query/set by vport, there was an underlying assumption that vport was equal to function ID. That's not the case for EC VF functions. Set the ec_vf_function bit accordingly. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 6 +++--- .../net/ethernet/mellanox/mlx5/core/vport.c | 19 +++++++++++++++---- include/linux/mlx5/vport.h | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0e7b5c6e4020f..7ca0c7a547aa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -325,10 +325,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); -int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); -#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \ - mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL) +#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ + mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bc66b078a8a1c..6d3984dd5b21e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1161,23 +1161,32 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +static int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) +{ + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + : vport; +} + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); - MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(query_hca_cap_in, in, other_function, true); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, - u16 function_id, u16 opmod) + u16 vport, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *set_hca_cap; void *set_ctx; @@ -1191,8 +1200,10 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); - MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, + mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); + MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); kfree(set_ctx); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 7f31432f44c2c..fbb9bf4478894 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -132,6 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod); #endif /* __MLX5_VPORT_H__ */ From a7719b29a82199b90ebbf355d3332e0fbfbf6045 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 21:24:55 +0200 Subject: [PATCH 05/15] net/mlx5: Add management of EC VF vports Add init, load, unload, and cleanup of the EC VF vports. This includes changes in how eswitch SRIOV is managed. Previous on an embedded CPU platform the number of VFs provided when enabling the eswitch was always 0, host VFs vports are handled in the eswitch functions change event handler. Now track the number of EC VFs as well, so they can be handled properly in the enable/disable flows. There are only 3 marks available for use in xarrays, all 3 were already in use for this use case. EC VF vports are in a known range so we can access them by index instead of marks. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 125 +++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 13 ++ .../mellanox/mlx5/core/eswitch_offloads.c | 22 +++ 3 files changed, 143 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ecd8864d5d11c..b33d852aae346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1051,6 +1051,18 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) } } +static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + /* Public E-Switch API */ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events) @@ -1090,6 +1102,19 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) } } +static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, + u16 num_ec_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_vport(esw, vport->vport); + } +} + int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events) { @@ -1110,6 +1135,26 @@ int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, return err; } +static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_ec_vf_vports(esw, num_ec_vfs); + return err; +} + static int host_pf_enable_hca(struct mlx5_core_dev *dev) { if (!mlx5_core_is_ecpf(dev)) @@ -1154,6 +1199,12 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; + } } /* Enable VF vports */ @@ -1164,6 +1215,9 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, return 0; vf_err: + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); +ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); ecpf_err: @@ -1180,8 +1234,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (mlx5_ecpf_vport_exists(esw->dev)) + if (mlx5_ecpf_vport_exists(esw->dev)) { + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + } host_pf_disable_hca(esw->dev); mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); @@ -1225,6 +1282,9 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_num_of_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + esw->esw_funcs.num_ec_vfs = num_vfs; + kvfree(out); } @@ -1332,9 +1392,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_event_handlers_register(esw); - esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); mlx5_esw_mode_change_notify(esw, esw->mode); @@ -1356,7 +1416,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { bool toggle_lag; - int ret; + int ret = 0; if (!mlx5_esw_allowed(esw)) return 0; @@ -1376,10 +1436,21 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ? MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE; - ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); - if (!ret) - esw->esw_funcs.num_vfs = num_vfs; + /* If this is the ECPF the number of host VFs is managed via the + * eswitch function change event handler, and any num_vfs provided + * here are intended to be EC VFs. + */ + if (!mlx5_core_is_ecpf(esw->dev)) { + ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_vfs = num_vfs; + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_ec_vfs = num_vfs; + } } + up_write(&esw->mode_lock); if (toggle_lag) @@ -1399,16 +1470,22 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ - if (!esw->esw_funcs.num_vfs && !clear_vf) + if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) goto unlock; - esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); - - mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (clear_vf) - mlx5_eswitch_clear_vf_vports_info(esw); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + if (!mlx5_core_is_ecpf(esw->dev)) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); + if (clear_vf) + mlx5_eswitch_clear_ec_vf_vports_info(esw); + } if (esw->mode == MLX5_ESWITCH_OFFLOADS) { struct devlink *devlink = priv_to_devlink(esw->dev); @@ -1419,7 +1496,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) if (esw->mode == MLX5_ESWITCH_LEGACY) mlx5_eswitch_disable_locked(esw); - esw->esw_funcs.num_vfs = 0; + if (!mlx5_core_is_ecpf(esw->dev)) + esw->esw_funcs.num_vfs = 0; + else + esw->esw_funcs.num_ec_vfs = 0; unlock: up_write(&esw->mode_lock); @@ -1439,9 +1519,9 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) mlx5_eswitch_event_handlers_unregister(esw); - esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; @@ -1601,6 +1681,17 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + int ec_vf_base_num = mlx5_core_ec_vf_vport_base(dev); + + for (i = 0; i < mlx5_core_max_ec_vfs(esw->dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, ec_vf_base_num + i); + if (err) + goto err; + idx++; + } + } + if (mlx5_ecpf_vport_exists(dev) || mlx5_core_is_ecpf_esw_manager(dev)) { err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d3608f198e0af..266b60fefe257 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -289,6 +289,7 @@ struct mlx5_host_work { struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; + u16 num_ec_vfs; }; enum { @@ -654,6 +655,18 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); #define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) +/* This macro should only be used if EC SRIOV is enabled. + * + * Because there were no more marks available on the xarray this uses a + * for_each_range approach. The range is only valid when EC SRIOV is enabled + */ +#define mlx5_esw_for_each_ec_vf_vport(esw, index, vport, last) \ + xa_for_each_range(&((esw)->vports), \ + index, \ + vport, \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base), \ + (last) - 1) + struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 6259824545757..68798aed792f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3287,6 +3287,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; /* Uplink vport rep must load first. */ err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); @@ -3524,8 +3527,27 @@ static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, goto revert_inline_mode; } } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_ec_vf_inline_mode; + } + } + } return 0; +revert_ec_vf_inline_mode: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } revert_inline_mode: mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { if (vport->vport == err_vport_num) From fa3c73eee641cf76bc232373303aa51a1cad8b8e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 21:36:39 +0200 Subject: [PATCH 06/15] net/mlx5: Add/remove peer miss rules for EC VFs Add and remove the peer miss rules for EC VFs. It's possible that there are different amounts of total VFs per function so only create rules for the minimum number of max VFs. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 68798aed792f1..fdf482f6fb34c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1125,11 +1125,32 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (i >= mlx5_core_max_ec_vfs(peer_dev)) + break; + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, vport->vport); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ec_vf_flow_err; + } + flows[vport->index] = flow; + } + } esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; kvfree(spec); return 0; +add_ec_vf_flow_err: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } add_vf_flow_err: mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { if (!flows[vport->index]) @@ -1162,6 +1183,17 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)]; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + /* The flow for a particular vport could be NULL if the other ECPF + * has fewer or no VFs enabled + */ + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + } + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[vport->index]); From 395ccd6eb49a12b021ac5deaa56e6b0b8f93241b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 00:53:21 +0200 Subject: [PATCH 07/15] net/mlx5: Add new page type for EC VF pages When the embedded cpu supports SRIOV it can be enabled and disabled independently from the host SRIOV. Track the pages separately so we can properly wait for returned VF pages. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 11 ++++++++++- include/linux/mlx5/driver.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index bb95b40d25eb5..fc13b41cc9b28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_ec_vfs", 0400, pages, &dev->priv.page_counters[MLX5_EC_VF]); debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 95dc67fb30015..dcf58efac159c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -79,7 +79,13 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct if (!func_id) return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; - return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; + if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) { + if (ec_function) + return MLX5_EC_VF; + else + return MLX5_VF; + } + return MLX5_SF; } static u32 mlx5_get_ec_function(u32 function) @@ -730,6 +736,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.page_counters[MLX5_HOST_PF], "External host PF FW pages counter is %d after reclaiming all pages\n", dev->priv.page_counters[MLX5_HOST_PF]); + WARN(dev->priv.page_counters[MLX5_EC_VF], + "EC VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_EC_VF]); return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 252b6a6965b82..18a608a1f567b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -581,6 +581,7 @@ enum mlx5_func_type { MLX5_VF, MLX5_SF, MLX5_HOST_PF, + MLX5_EC_VF, MLX5_FUNC_TYPE_NUM, }; From 2ee3db806e851b9f3bfc46a1004a1ccee180b0a8 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:06:58 +0200 Subject: [PATCH 08/15] net/mlx5: Use correct vport when restoring GUIDs Prior to enabling EC VF functionality the vport number and function ID were always the same. That's not the case now. Use the correct vport number to modify the HCA vport context. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index f07d009291621..c2463a1d7035b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -37,7 +37,7 @@ #include "mlx5_irq.h" #include "eswitch.h" -static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf, u16 func_id) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct mlx5_hca_vport_context *in; @@ -59,7 +59,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; - err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, func_id, in); if (err) mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); @@ -73,6 +73,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err, vf, num_msix_count; + int vport_num; err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { @@ -104,7 +105,10 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) sriov->vfs_ctx[vf].enabled = 1; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { - err = sriov_restore_guids(dev, vf); + vport_num = mlx5_core_ec_sriov_enabled(dev) ? + mlx5_core_ec_vf_vport_base(dev) + vf + : vf + 1; + err = sriov_restore_guids(dev, vf, vport_num); if (err) { mlx5_core_warn(dev, "failed to restore VF %d settings, err %d\n", From 42a84a430931afe2ccf31a6910dec86e87de5d2a Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:13:43 +0200 Subject: [PATCH 09/15] net/mlx5: Query correct caps for min msix vectors The VFs on the host and the embedded CPU platform share function numbers. Set the ec_vf_function field to query the caps for the correct function. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 843da89a90350..b2dbae763ca6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -41,6 +41,15 @@ struct mlx5_irq_table { struct mlx5_irq_pool *sf_comp_pool; }; +static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, + int func, + bool ec_vf_func) +{ + if (!ec_vf_func) + return func; + return mlx5_core_ec_vf_vport_base(dev) + func - 1; +} + /** * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors * to be ssigned to each VF. @@ -79,6 +88,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; + bool ec_vf_function; + int vport; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); @@ -104,7 +115,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, goto out; } - ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap); + ec_vf_function = mlx5_core_ec_sriov_enabled(dev); + vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); + ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); if (ret) goto out; @@ -115,6 +128,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); MLX5_SET(set_hca_cap_in, hca_cap, op_mod, From 6d98f314bfca10cebf66e42573c4b362ed2ee17c Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 18:52:29 +0200 Subject: [PATCH 10/15] net/mlx5: Update SRIOV enable/disable to handle EC/VFs Previously on the embedded CPU platform SRIOV was never enabled/disabled via mlx5_core_sriov_configure. Host VF updates are provided by an event handler. Now in the disable flow it must be known if this is a disable due to driver unload or SRIOV detach, or if the user updated the number of VFs. If due to change in the number of VFs only wait for the pages of ECVFs. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/main.c | 2 +- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- .../net/ethernet/mellanox/mlx5/core/sriov.c | 35 +++++++++++++++---- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6ee016deae17..fed8b48a5b20d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1809,7 +1809,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); devlink_unregister(devlink); - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, false); mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); mlx5_uninit_one(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7ca0c7a547aa6..7a5f040820584 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -195,7 +195,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); -void mlx5_sriov_disable(struct pci_dev *pdev); +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index c2463a1d7035b..b73583b0a0fed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -123,9 +123,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } static void -mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf, bool num_vf_change) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + bool wait_for_ec_vf_pages = true; + bool wait_for_vf_pages = true; int err; int vf; @@ -147,11 +149,30 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); + /* There are a number of scenarios when SRIOV is being disabled: + * 1. VFs or ECVFs had been created, and now set back to 0 (num_vf_change == true). + * - If EC SRIOV is enabled then this flow is happening on the + * embedded platform, wait for only EC VF pages. + * - If EC SRIOV is not enabled this flow is happening on non-embedded + * platform, wait for the VF pages. + * + * 2. The driver is being unloaded. In this case wait for all pages. + */ + if (num_vf_change) { + if (mlx5_core_ec_sriov_enabled(dev)) + wait_for_vf_pages = false; + else + wait_for_ec_vf_pages = false; + } + + if (wait_for_ec_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_EC_VF])) + mlx5_core_warn(dev, "timeout reclaiming EC VFs pages\n"); + /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */ if (mlx5_core_is_ecpf(dev)) return; - if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) + if (wait_for_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } @@ -172,12 +193,12 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, true); } return err; } -void mlx5_sriov_disable(struct pci_dev *pdev) +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); @@ -185,7 +206,7 @@ void mlx5_sriov_disable(struct pci_dev *pdev) pci_disable_sriov(pdev); devl_lock(devlink); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, num_vf_change); devl_unlock(devlink); } @@ -200,7 +221,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); else - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, true); if (!err) sriov->num_vfs = num_vfs; @@ -245,7 +266,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false, false); } static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) From 7057fe561988effa0b044b99262bb3712a5892c0 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 15 Mar 2023 17:29:13 +0200 Subject: [PATCH 11/15] net/mlx5: Set max number of embedded CPU VFs Set the maximum number of embedded cpu VF functions available. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index b73583b0a0fed..4e42a3b9b8eec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -305,6 +305,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) total_vfs = pci_sriov_get_totalvfs(pdev); sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); + sriov->max_ec_vfs = mlx5_core_ec_sriov_enabled(dev) ? pci_sriov_get_totalvfs(dev->pdev) : 0; sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; From 2059cf51f318681a4cdd3eb1a01a2d62b6a9c442 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 3 May 2023 12:08:48 +0300 Subject: [PATCH 12/15] net/mlx5: Split function_setup() to enable and open functions mlx5_cmd_init_hca() is taking ~0.2 seconds. In case of a user who desire to disable some of the SF aux devices, and with large scale-1K SFs for example, this user will waste more than 3 minutes on mlx5_cmd_init_hca() which isn't needed at this stage. Downstream patch will change SFs which are probe over the E-switch, local SFs, to be probed without any aux dev. In order to support this, split function_setup() to avoid executing mlx5_cmd_init_hca(). Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/main.c | 83 +++++++++++++------ 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fed8b48a5b20d..0faae77d84e6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1118,7 +1118,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) { int err; @@ -1183,28 +1183,56 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto reclaim_boot_pages; } + return 0; + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); +err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); + + return err; +} + +static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) +{ + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); +} + +static int mlx5_function_open(struct mlx5_core_dev *dev) +{ + int err; + err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); - goto reclaim_boot_pages; + return err; } err = set_hca_cap(dev); if (err) { mlx5_core_err(dev, "set_hca_cap failed\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_satisfy_startup_pages(dev, 0); if (err) { mlx5_core_err(dev, "failed to allocate init pages\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { mlx5_core_err(dev, "init hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_set_driver_version(dev); @@ -1212,26 +1240,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_start_health_fw_log_up(dev); - return 0; - -reclaim_boot_pages: - mlx5_reclaim_startup_pages(dev); -err_disable_hca: - mlx5_core_disable_hca(dev, 0); -stop_health_poll: - mlx5_stop_health_poll(dev, boot); -err_cmd_cleanup: - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); - - return err; } -static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +static int mlx5_function_close(struct mlx5_core_dev *dev) { int err; @@ -1240,15 +1255,33 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); return err; } - mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev, 0); - mlx5_stop_health_poll(dev, boot); - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); return 0; } +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + err = mlx5_function_enable(dev, boot, timeout); + if (err) + return err; + + err = mlx5_function_open(dev); + if (err) + mlx5_function_disable(dev, boot); + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err = mlx5_function_close(dev); + + if (!err) + mlx5_function_disable(dev, boot); + return err; +} + static int mlx5_load(struct mlx5_core_dev *dev) { int err; From 3f90840305e2b240749aec7dde23f5262e513641 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 17 May 2023 17:39:54 +0300 Subject: [PATCH 13/15] net/mlx5: Move esw multiport devlink param to eswitch code Move the param registration and handling code into the eswitch code as they are related to each other. No point in having the devlink param registration done in separate file. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/devlink.c | 34 ------------- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 49 ++++++++++++++++++- 2 files changed, 47 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 63635cc444790..27197acdb4d86 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,7 +7,6 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" -#include "lag/lag.h" #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -427,33 +426,6 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } - -static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - if (ctx->val.vbool) - return mlx5_lag_mpesw_enable(dev); - - mlx5_lag_mpesw_disable(dev); - return 0; -} - -static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - ctx->val.vbool = mlx5_lag_is_mpesw(dev); - return 0; -} #endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, @@ -527,12 +499,6 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, - "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_esw_multiport_get, - mlx5_devlink_esw_multiport_set, - NULL), #endif DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b33d852aae346..2af9c4646bc7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -41,6 +41,7 @@ #include "esw/qos.h" #include "mlx5_core.h" #include "lib/eq.h" +#include "lag/lag.h" #include "eswitch.h" #include "fs_core.h" #include "devlink.h" @@ -1709,6 +1710,38 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) return err; } +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_lag_is_mpesw(dev); + return 0; +} + +static const struct devlink_param mlx5_eswitch_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, NULL), +}; + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -1717,9 +1750,16 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) return 0; + err = devl_params_register(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); + if (err) + return err; + esw = kzalloc(sizeof(*esw), GFP_KERNEL); - if (!esw) - return -ENOMEM; + if (!esw) { + err = -ENOMEM; + goto unregister_param; + } esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); @@ -1779,6 +1819,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (esw->work_queue) destroy_workqueue(esw->work_queue); kfree(esw); +unregister_param: + devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); return err; } @@ -1802,6 +1845,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); kfree(esw); + devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); } /* Vport Administration */ From e71383fb9cd15a28d6c01d2c165a96f1c0bcf418 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 3 May 2023 14:18:23 +0300 Subject: [PATCH 14/15] net/mlx5: Light probe local SFs In case user wants to configure the SFs, for example: to use only vdpa functionality, he needs to fully probe a SF, configure what he wants, and afterward reload the SF. In order to save the time of the reload, local SFs will probe without any auxiliary sub-device, so that the SFs can be configured prior to its full probe. The defaults of the enable_* devlink params of these SFs are set to false. Usage example: Create SF: $ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11 $ devlink port function set pci/0000:08:00.0/32768 \ hw_addr 00:00:00:00:00:11 state active Enable ETH auxiliary device: $ devlink dev param set auxiliary/mlx5_core.sf.1 \ name enable_eth value true cmode driverinit Now, in order to fully probe the SF, use devlink reload: $ devlink dev reload auxiliary/mlx5_core.sf.1 At this point the user have SF devlink instance with auxiliary device for the Ethernet functionality only. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/switchdev.rst | 20 +++ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 16 +++ .../net/ethernet/mellanox/mlx5/core/devlink.c | 20 ++- .../net/ethernet/mellanox/mlx5/core/health.c | 24 ++-- .../net/ethernet/mellanox/mlx5/core/main.c | 124 ++++++++++++++++-- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 7 + .../mellanox/mlx5/core/sf/dev/driver.c | 15 ++- 7 files changed, 203 insertions(+), 23 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst index 01deedb715975..db62187eebcec 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst @@ -45,6 +45,26 @@ Following bridge VLAN functions are supported by mlx5: Subfunction =========== +Subfunction which are spawned over the E-switch are created only with devlink +device, and by default all the SF auxiliary devices are disabled. +This will allow user to configure the SF before the SF have been fully probed, +which will save time. + +Usage example: +Create SF: +$ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11 +$ devlink port function set pci/0000:08:00.0/32768 \ + hw_addr 00:00:00:00:00:11 state active + +Enable ETH auxiliary device: +$ devlink dev param set auxiliary/mlx5_core.sf.1 \ + name enable_eth value true cmode driverinit + +Now, in order to fully probe the SF, use devlink reload: +$ devlink dev reload auxiliary/mlx5_core.sf.1 + +mlx5 supports ETH,rdma and vdpa (vnet) auxiliary devices devlink params (see :ref:`Documentation/networking/devlink/devlink-params.rst`) + mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. A subfunction has its own function capabilities and its own resources. This diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 1b33533b15dea..617ac7e5d75cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -323,6 +323,18 @@ static void del_adev(struct auxiliary_device *adev) auxiliary_device_uninit(adev); } +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev) +{ + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev) +{ + return dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; +} + int mlx5_attach_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -457,6 +469,10 @@ static int add_drivers(struct mlx5_core_dev *dev) if (priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled && + !(mlx5_adev_devices[i].is_enabled(dev))) + continue; + if (mlx5_adev_devices[i].is_supported) is_supported = mlx5_adev_devices[i].is_supported(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 27197acdb4d86..3d82ec8906660 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -141,6 +141,13 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, bool sf_dev_allocated; int ret = 0; + if (mlx5_dev_is_lightweight(dev)) { + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + mlx5_unload_one_light(dev); + return 0; + } + sf_dev_allocated = mlx5_sf_dev_allocated(dev); if (sf_dev_allocated) { /* Reload results in deleting SF device which further results in @@ -193,6 +200,10 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (mlx5_dev_is_lightweight(dev)) { + mlx5_fw_reporters_create(dev); + return mlx5_init_one_devl_locked(dev); + } ret = mlx5_load_one_devl_locked(dev, false); break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: @@ -511,7 +522,7 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; - value.vbool = MLX5_CAP_GEN(dev, roce); + value.vbool = MLX5_CAP_GEN(dev, roce) && !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, value); @@ -561,7 +572,7 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, value); @@ -601,6 +612,7 @@ static const struct devlink_param mlx5_devlink_rdma_params[] = { static int mlx5_devlink_rdma_params_register(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; int err; @@ -612,7 +624,7 @@ static int mlx5_devlink_rdma_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, value); @@ -647,7 +659,7 @@ static int mlx5_devlink_vnet_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, value); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 871c32dda66ec..210100a4064a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -719,7 +719,7 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD -static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); @@ -735,17 +735,17 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) } health->fw_reporter = - devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, - 0, dev); + devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); health->fw_fatal_reporter = - devlink_health_reporter_create(devlink, - &mlx5_fw_fatal_reporter_ops, - grace_period, - dev); + devl_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + grace_period, + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); @@ -777,7 +777,8 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - queue_work(health->wq, &health->fatal_report_work); + if (!mlx5_dev_is_lightweight(dev)) + queue_work(health->wq, &health->fatal_report_work); } #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) @@ -905,10 +906,15 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) int mlx5_health_init(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_core_health *health; char *name; - mlx5_fw_reporters_create(dev); + if (!mlx5_dev_is_lightweight(dev)) { + devl_lock(devlink); + mlx5_fw_reporters_create(dev); + devl_unlock(devlink); + } mlx5_reporter_vnic_create(dev); health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0faae77d84e6e..6fa314f8e5ee5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1424,12 +1424,11 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -int mlx5_init_one(struct mlx5_core_dev *dev) +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); + bool light_probe = mlx5_dev_is_lightweight(dev); int err = 0; - devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; @@ -1443,9 +1442,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto function_teardown; } - err = mlx5_devlink_params_register(priv_to_devlink(dev)); - if (err) - goto err_devlink_params_reg; + /* In case of light_probe, mlx5_devlink is already registered. + * Hence, don't register devlink again. + */ + if (!light_probe) { + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + } err = mlx5_load(dev); if (err) @@ -1458,14 +1462,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto err_register; mutex_unlock(&dev->intf_state_mutex); - devl_unlock(devlink); return 0; err_register: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: - mlx5_devlink_params_unregister(priv_to_devlink(dev)); + if (!light_probe) + mlx5_devlink_params_unregister(priv_to_devlink(dev)); err_devlink_params_reg: mlx5_cleanup_once(dev); function_teardown: @@ -1473,6 +1477,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev) err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_init_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_init_one_devl_locked(dev); devl_unlock(devlink); return err; } @@ -1590,6 +1604,100 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) devl_unlock(devlink); } +/* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. + * A full query of hca_caps will be done when the device will reload. + */ +static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + + return 0; +} + +int mlx5_init_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + dev->state = MLX5_DEVICE_STATE_UP; + err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) { + mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); + goto out; + } + + err = mlx5_query_hca_caps_light(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); + goto query_hca_caps_err; + } + + devl_lock(devlink); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); + goto query_hca_caps_err; + } + + return 0; + +query_hca_caps_err: + mlx5_function_disable(dev, true); +out: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + return err; +} + +void mlx5_uninit_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + devl_unlock(devlink); + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, true); +} + +/* xxx_light() function are used in order to configure the device without full + * init (light init). e.g.: There isn't a point in reload a device to light state. + * Hence, mlx5_load_one_light() isn't needed. + */ + +void mlx5_unload_one_light(struct mlx5_core_dev *dev) +{ + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, false); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + static const int types[] = { MLX5_CAP_GENERAL, MLX5_CAP_GENERAL_2, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7a5f040820584..464c6885a01c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -240,11 +240,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev); +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); @@ -319,11 +322,15 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); +int mlx5_init_one_light(struct mlx5_core_dev *dev); +void mlx5_uninit_one_light(struct mlx5_core_dev *dev); +void mlx5_unload_one_light(struct mlx5_core_dev *dev); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 0692363cf80e4..8fe82f1191bb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -3,6 +3,7 @@ #include #include +#include #include "mlx5_core.h" #include "dev.h" #include "devlink.h" @@ -28,6 +29,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->priv.adev_idx = adev->id; sf_dev->mdev = mdev; + /* Only local SFs do light probe */ + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + mlx5_dev_set_lightweight(mdev); + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); if (err) { mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); @@ -41,7 +46,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } - err = mlx5_init_one(mdev); + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + err = mlx5_init_one_light(mdev); + else + err = mlx5_init_one(mdev); if (err) { mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); goto init_one_err; @@ -65,7 +73,10 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) mlx5_drain_health_wq(sf_dev->mdev); devlink_unregister(devlink); - mlx5_uninit_one(sf_dev->mdev); + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); mlx5_mdev_uninit(sf_dev->mdev); mlx5_devlink_free(devlink); From 978015f7ef9240acfb078f4c1c0d79459b42f951 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 29 May 2023 10:34:59 +0200 Subject: [PATCH 15/15] net/mlx5e: Remove a useless function call 'handle' is known to be NULL here. There is no need to kfree() it. Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 0290e0dea5390..4e923a2874aef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -112,10 +112,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) { - kfree(handle); + if (!handle) return ERR_PTR(-ENOMEM); - } post_attr->chain = 0; post_attr->prio = 0;