Skip to content

Commit

Permalink
Merge tag 'mlx5-updates-2022-01-06' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2022-01-06

1) Expose FEC per lane block counters via ethtool

2) Trivial fixes/updates/cleanup to mlx5e netdev driver

3) Fix htmldoc build warning

4) Spread mlx5 SFs (sub-functions) to all available CPU cores: Commits 1..5

Shay Drory Says:
================
Before this patchset, mlx5 subfunction shared the same IRQs (MSI-X) with
their peers subfunctions, causing them to use same CPU cores.

In large scale, this is very undesirable, SFs use small number of cpu
cores and all of them will be packed on the same CPU cores, not
utilizing all CPU cores in the system.

In this patchset we want to achieve two things.
 a) Spread IRQs used by SFs to all cpu cores
 b) Pack less SFs in the same IRQ, will result in multiple IRQs per core.

In this patchset, we spread SFs over all online cpus available to mlx5
irqs in Round-Robin manner. e.g.: Whenever a SF is created, pick the next
CPU core with least number of SF IRQs bound to it, SFs will share IRQs on
the same core until a certain limit, when such limit is reached, we
request a new IRQ and add it to that CPU core IRQ pool, when out of IRQs,
pick any IRQ with least number of SF users.

This enhancement is done in order to achieve a better distribution of
the SFs over all the available CPUs, which reduces application latency,
as shown bellow.

Machine details:
Intel(R) Xeon(R) CPU E5-2697 v3 @ 2.60GHz with 56 cores.
PCI Express 3 with BW of 126 Gb/s.
ConnectX-5 Ex; EDR IB (100Gb/s) and 100GbE; dual-port QSFP28; PCIe4.0
x16.

Base line test description:
Single SF on the system. One instance of netperf is running on-top the
SF.
Numbers: latency = 15.136 usec, CPU Util = 35%

Test description:
There are 250 SFs on the system. There are 3 instances of netperf
running, on-top three different SFs, in parallel.

Perf numbers:
 # netperf     SFs         latency(usec)     latency    CPU utilization
   affinity    affinity    (lower is better) increase %
 1 cpu=0       cpu={0}     ~23 (app 1-3)     35%        75%
 2 cpu=0,2,4   cpu={0}     app 1: 21.625     30%        68% (CPU 0)
                           app 2-3: 16.5     9%         15% (CPU 2,4)
 3 cpu=0       cpu={0,2,4} app 1: ~16        7%         84% (CPU 0)
                           app 2-3: ~17.9    14%        22% (CPU 2,4)
 4 cpu=0,2,4   cpu={0,2,4} 15.2 (app 1-3)    0%         33% (CPU 0,2,4)

 - The first two entries (#1 and #2) show current state. e.g.: SFs are
   using the same CPU. The last two entries (#3 and #4) shows the latency
   reduction improvement of this patch. e.g.: SFs are on different CPUs.
 - Whenever we use several CPUs, in case there is a different CPU
   utilization, write the utilization of each CPU separately.
 - Whenever the latency result of the netperf instances were different,
   write the latency of each netperf instances separately.

Commands:
 - for netperf CPU=0:
$ for i in {1..3}; do taskset -c 0 netperf -H 1${i}.1.1.1 -t TCP_RR  -- \
  -o RT_LATENCY -r8 & done

 - for netperf CPU=0,2,4
$ for i in {1..3}; do taskset -c $(( ($i - 1) * 2  )) netperf -H \
  1${i}.1.1.1 -t TCP_RR  -- -o RT_LATENCY -r8 & done

================

====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 7, 2022
2 parents e4a3d6a + 745a130 commit 26abf15
Show file tree
Hide file tree
Showing 19 changed files with 675 additions and 236 deletions.
1 change: 1 addition & 0 deletions Documentation/networking/devlink/mlx5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Parameters
- Validation
* - ``enable_roce``
- driverinit
- Type: Boolean
* - ``io_eq_size``
- driverinit
- The range is between 64 and 4096.
Expand Down
6 changes: 0 additions & 6 deletions drivers/infiniband/hw/mlx5/odp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1541,16 +1541,10 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)

eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
param = (struct mlx5_eq_param) {
.irq_index = MLX5_IRQ_EQ_CTRL,
.nent = MLX5_IB_NUM_PF_EQE,
};
param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
err = -ENOMEM;
goto err_wq;
}
eq->core = mlx5_eq_create_generic(dev->mdev, &param);
free_cpumask_var(param.affinity);
if (IS_ERR(eq->core)) {
err = PTR_ERR(eq->core);
goto err_wq;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o
#
# SF device
#
mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o
mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_affinity.o

#
# SF manager
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en.h
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ struct mlx5e_profile {
};

#define mlx5e_profile_feature_cap(profile, feature) \
((profile)->features & (MLX5E_PROFILE_FEATURE_## feature))
((profile)->features & BIT(MLX5E_PROFILE_FEATURE_##feature))

void mlx5e_build_ptys2ethtool_map(void);

Expand Down
8 changes: 3 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,14 @@ static void mlx5e_hv_vhca_stats_cleanup(struct mlx5_hv_vhca_agent *agent)
cancel_delayed_work_sync(&priv->stats_agent.work);
}

int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
int buf_len = mlx5e_hv_vhca_stats_buf_size(priv);
struct mlx5_hv_vhca_agent *agent;

priv->stats_agent.buf = kvzalloc(buf_len, GFP_KERNEL);
if (!priv->stats_agent.buf)
return -ENOMEM;
return;

agent = mlx5_hv_vhca_agent_create(priv->mdev->hv_vhca,
MLX5_HV_VHCA_AGENT_STATS,
Expand All @@ -142,13 +142,11 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
PTR_ERR(agent));

kvfree(priv->stats_agent.buf);
return IS_ERR_OR_NULL(agent);
return;
}

priv->stats_agent.agent = agent;
INIT_DELAYED_WORK(&priv->stats_agent.work, mlx5e_hv_vhca_stats_work);

return 0;
}

void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
Expand Down
13 changes: 3 additions & 10 deletions drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,12 @@

#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)

int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv);
void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv);

#else

static inline int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
{
return 0;
}

static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
{
}
static inline void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) {}
static inline void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv) {}
#endif

#endif /* __MLX5_EN_STATS_VHCA_H__ */
8 changes: 0 additions & 8 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,10 @@ verify_uplink_forwarding(struct mlx5e_priv *priv,
termination_table_raw_traffic)) {
NL_SET_ERR_MSG_MOD(extack,
"devices are both uplink, can't offload forwarding");
pr_err("devices %s %s are both uplink, can't offload forwarding\n",
priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
} else if (out_dev != rep_priv->netdev) {
NL_SET_ERR_MSG_MOD(extack,
"devices are not the same uplink, can't offload forwarding");
pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
priv->netdev->name, out_dev->name);
return -EOPNOTSUPP;
}
return 0;
Expand Down Expand Up @@ -160,10 +156,6 @@ tc_act_can_offload_mirred(struct mlx5e_tc_act_parse_state *parse_state,
}

NL_SET_ERR_MSG_MOD(extack, "devices are not on same switch HW, can't offload forwarding");
netdev_warn(priv->netdev,
"devices %s %s not on same switch HW, can't offload forwarding\n",
netdev_name(priv->netdev),
out_dev->name);

return false;
}
Expand Down
19 changes: 7 additions & 12 deletions drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1883,24 +1883,19 @@ static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable,
bool is_rx_cq)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_params new_params;
bool mode_changed;
u8 cq_period_mode, current_cq_period_mode;
struct mlx5e_params new_params;

if (enable && !MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
return -EOPNOTSUPP;

cq_period_mode = cqe_mode_to_period_mode(enable);

cq_period_mode = enable ?
MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
current_cq_period_mode = is_rx_cq ?
priv->channels.params.rx_cq_moderation.cq_period_mode :
priv->channels.params.tx_cq_moderation.cq_period_mode;
mode_changed = cq_period_mode != current_cq_period_mode;

if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE &&
!MLX5_CAP_GEN(mdev, cq_period_start_from_cqe))
return -EOPNOTSUPP;

if (!mode_changed)
if (cq_period_mode == current_cq_period_mode)
return 0;

new_params = priv->channels.params;
Expand Down
10 changes: 5 additions & 5 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3605,11 +3605,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable)
new_params = priv->channels.params;

if (enable) {
if (MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
netdev_warn(netdev, "Can't set HW-GRO when CQE compress is active\n");
err = -EINVAL;
goto out;
}
new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
new_params.packet_merge.shampo.match_criteria_type =
MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
Expand Down Expand Up @@ -3871,6 +3866,11 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");

if (features & NETIF_F_GRO_HW) {
netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
features &= ~NETIF_F_GRO_HW;
}
}

if (mlx5e_is_uplink_rep(priv))
Expand Down
20 changes: 11 additions & 9 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,12 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
}
}

static void mlx5e_handle_rx_err_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
trigger_report(rq, cqe);
rq->stats->wqe_err++;
}

static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
{
struct mlx5_wq_cyc *wq = &rq->wqe.wq;
Expand All @@ -1616,8 +1622,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);

if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
trigger_report(rq, cqe);
rq->stats->wqe_err++;
mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}

Expand Down Expand Up @@ -1670,7 +1675,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
cqe_bcnt = be32_to_cpu(cqe->byte_cnt);

if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
rq->stats->wqe_err++;
mlx5e_handle_rx_err_cqe(rq, cqe);
goto free_wqe;
}

Expand Down Expand Up @@ -1719,8 +1724,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
wi->consumed_strides += cstrides;

if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
trigger_report(rq, cqe);
rq->stats->wqe_err++;
mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}

Expand Down Expand Up @@ -1988,8 +1992,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
wi->consumed_strides += cstrides;

if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
trigger_report(rq, cqe);
stats->wqe_err++;
mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}

Expand Down Expand Up @@ -2058,8 +2061,7 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
wi->consumed_strides += cstrides;

if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
trigger_report(rq, cqe);
rq->stats->wqe_err++;
mlx5e_handle_rx_err_cqe(rq, cqe);
goto mpwrq_cqe_out;
}

Expand Down
101 changes: 98 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "en_accel/tls.h"
#include "en_accel/en_accel.h"
#include "en/ptp.h"
#include "en/port.h"

static unsigned int stats_grps_num(struct mlx5e_priv *priv)
{
Expand Down Expand Up @@ -1158,12 +1159,99 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy)
mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
}

void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
struct ethtool_fec_stats *fec_stats)
static int fec_num_lanes(struct mlx5_core_dev *dev)
{
u32 out[MLX5_ST_SZ_DW(pmlp_reg)] = {};
u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {};
int err;

MLX5_SET(pmlp_reg, in, local_port, 1);
err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
MLX5_REG_PMLP, 0, 0);
if (err)
return 0;

return MLX5_GET(pmlp_reg, out, width);
}

static int fec_active_mode(struct mlx5_core_dev *mdev)
{
unsigned long fec_active_long;
u32 fec_active;

if (mlx5e_get_fec_mode(mdev, &fec_active, NULL))
return MLX5E_FEC_NOFEC;

fec_active_long = fec_active;
return find_first_bit(&fec_active_long, sizeof(unsigned long) * BITS_PER_BYTE);
}

#define MLX5E_STATS_SET_FEC_BLOCK(idx) ({ \
fec_stats->corrected_blocks.lanes[(idx)] = \
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
fc_fec_corrected_blocks_lane##idx); \
fec_stats->uncorrectable_blocks.lanes[(idx)] = \
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs, \
fc_fec_uncorrectable_blocks_lane##idx); \
})

static void fec_set_fc_stats(struct ethtool_fec_stats *fec_stats,
u32 *ppcnt, u8 lanes)
{
if (lanes > 3) { /* 4 lanes */
MLX5E_STATS_SET_FEC_BLOCK(3);
MLX5E_STATS_SET_FEC_BLOCK(2);
}
if (lanes > 1) /* 2 lanes */
MLX5E_STATS_SET_FEC_BLOCK(1);
if (lanes > 0) /* 1 lane */
MLX5E_STATS_SET_FEC_BLOCK(0);
}

static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt)
{
fec_stats->corrected_blocks.total =
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
rs_fec_corrected_blocks);
fec_stats->uncorrectable_blocks.total =
MLX5E_READ_CTR64_BE_F(ppcnt, phys_layer_cntrs,
rs_fec_uncorrectable_blocks);
}

static void fec_set_block_stats(struct mlx5e_priv *priv,
struct ethtool_fec_stats *fec_stats)
{
struct mlx5_core_dev *mdev = priv->mdev;
u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
int mode = fec_active_mode(mdev);

if (mode == MLX5E_FEC_NOFEC)
return;

MLX5_SET(ppcnt_reg, in, local_port, 1);
MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
if (mlx5_core_access_reg(mdev, in, sz, outl, sz, MLX5_REG_PPCNT, 0, 0))
return;

switch (mode) {
case MLX5E_FEC_RS_528_514:
case MLX5E_FEC_RS_544_514:
case MLX5E_FEC_LLRS_272_257_1:
fec_set_rs_stats(fec_stats, out);
return;
case MLX5E_FEC_FIRECODE:
fec_set_fc_stats(fec_stats, out, fec_num_lanes(mdev));
}
}

static void fec_set_corrected_bits_total(struct mlx5e_priv *priv,
struct ethtool_fec_stats *fec_stats)
{
u32 ppcnt_phy_statistical[MLX5_ST_SZ_DW(ppcnt_reg)];
struct mlx5_core_dev *mdev = priv->mdev;
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0};
u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);

if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
Expand All @@ -1181,6 +1269,13 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
phy_corrected_bits);
}

void mlx5e_stats_fec_get(struct mlx5e_priv *priv,
struct ethtool_fec_stats *fec_stats)
{
fec_set_corrected_bits_total(priv, fec_stats);
fec_set_block_stats(priv, fec_stats);
}

#define PPORT_ETH_EXT_OFF(c) \
MLX5_BYTE_OFF(ppcnt_reg, \
counter_set.eth_extended_cntrs_grp_data_layout.c##_high)
Expand Down
Loading

0 comments on commit 26abf15

Please sign in to comment.