From 19f5b63bc9932d51292d72c9dc3ec95e5dfa2289 Mon Sep 17 00:00:00 2001 From: Moshe Tal Date: Thu, 16 Jul 2020 14:59:30 +0300 Subject: [PATCH 01/16] net/mlx5: Fix uninitialized variable warning Add variable initialization to eliminate the warning "variable may be used uninitialized". Fixes: 5f29458b77d5 ("net/mlx5e: Support dump callback in TX reporter") Signed-off-by: Moshe Tal Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 3dc200bcfabde..69a05da0e3e3d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -242,8 +242,8 @@ static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg, { u32 data_size; + int err = 0; u32 offset; - int err; for (offset = 0; offset < value_len; offset += data_size) { data_size = value_len - offset; From ec529b44abfe06a948115e48da02bc59d871242c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Aug 2020 17:34:48 +0300 Subject: [PATCH 02/16] net/mlx5: remove erroneous fallthrough This isn't a fall through because it was after a return statement. The fall through annotation leads to a Smatch warning: drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c:246 mlx5e_ethtool_get_sset_count() warn: ignoring unreachable code. Signed-off-by: Dan Carpenter Reviewed-by: Gustavo A. R. Silva --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5cb1e4839eb79..e2f092e6da3ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -243,7 +243,6 @@ int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) return MLX5E_NUM_PFLAGS; case ETH_SS_TEST: return mlx5e_self_test_num(priv); - fallthrough; default: return -EOPNOTSUPP; } From fb609b5112bd74b4ba93c86d7af4089ffd9432c2 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Wed, 13 May 2020 11:06:47 +0300 Subject: [PATCH 03/16] net/mlx5: Always use container_of to find mdev pointer from clock struct Clock struct is part of struct mlx5_core_dev. Code was inconsistent, on some cases used container_of and on another used clock->mdev. Align code to use container_of amd remove clock->mdev pointer. While here, fix reverse xmas tree coding style. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh --- .../ethernet/mellanox/mlx5/core/lib/clock.c | 52 +++++++++++-------- include/linux/mlx5/driver.h | 1 - 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 2d55b7c22c034..a07aeb97d027c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -150,28 +150,30 @@ static void mlx5_pps_out(struct work_struct *work) static void mlx5_timestamp_overflow(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); - struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock, - overflow_work); + struct mlx5_core_dev *mdev; + struct mlx5_clock *clock; unsigned long flags; + clock = container_of(dwork, struct mlx5_clock, overflow_work); + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); schedule_delayed_work(&clock->overflow_work, clock->overflow_period); } -static int mlx5_ptp_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); u64 ns = timespec64_to_ns(ts); + struct mlx5_core_dev *mdev; unsigned long flags; + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_init(&clock->tc, &clock->cycles, ns); - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); return 0; @@ -180,13 +182,12 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct ptp_system_timestamp *sts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; unsigned long flags; u64 cycles, ns; + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); cycles = mlx5_read_internal_timer(mdev, sts); ns = timecounter_cyc2time(&clock->tc, cycles); @@ -199,13 +200,14 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; unsigned long flags; + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_adjtime(&clock->tc, delta); - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); return 0; @@ -213,12 +215,13 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) { - u64 adj; - u32 diff; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; unsigned long flags; int neg_adj = 0; - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, - ptp_info); + u32 diff; + u64 adj; + if (delta < 0) { neg_adj = 1; @@ -229,11 +232,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); + mdev = container_of(clock, struct mlx5_core_dev, clock); write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : clock->nominal_c_mult + diff; - mlx5_update_clock_info_page(clock->mdev); + mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); return 0; @@ -465,7 +469,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) { - struct mlx5_core_dev *mdev = clock->mdev; + struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); + u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; u8 mode; int err; @@ -538,15 +543,17 @@ static int mlx5_pps_event(struct notifier_block *nb, unsigned long type, void *data) { struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); - struct mlx5_core_dev *mdev = clock->mdev; struct ptp_clock_event ptp_event; u64 cycles_now, cycles_delta; u64 nsec_now, nsec_delta, ns; struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; + struct mlx5_core_dev *mdev; struct timespec64 ts; unsigned long flags; + mdev = container_of(clock, struct mlx5_core_dev, clock); + switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; @@ -605,7 +612,6 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) clock->cycles.shift); clock->nominal_c_mult = clock->cycles.mult; clock->cycles.mask = CLOCKSOURCE_MASK(41); - clock->mdev = mdev; timecounter_init(&clock->tc, &clock->cycles, ktime_to_ns(ktime_get_real())); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c145de0473bc1..8dc3da6e6480e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -643,7 +643,6 @@ struct mlx5_pps { }; struct mlx5_clock { - struct mlx5_core_dev *mdev; struct mlx5_nb pps_nb; seqlock_t lock; struct cyclecounter cycles; From aac2df7f022eccb5d117f07b1e231410db1a863a Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 9 Jun 2020 10:58:31 +0300 Subject: [PATCH 04/16] net/mlx5: Rename ptp clock info Fix a typo in ptp_clock_info naming: mlx5_p2p -> mlx5_ptp. Signed-off-by: Eran Ben Elisha --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index a07aeb97d027c..b62daf7b9a5cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -441,7 +441,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, - .name = "mlx5_p2p", + .name = "mlx5_ptp", .max_adj = 100000000, .n_alarm = 0, .n_ext_ts = 0, From 87f3495cbe8d1d34ce430b11b1ea34e69e6f4126 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 19 May 2020 12:00:57 +0300 Subject: [PATCH 05/16] net/mlx5: Release clock lock before scheduling a PPS work Holding the clock lock is not required when scheduling a PPS work. Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index b62daf7b9a5cd..f8465e42b238d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -581,8 +581,8 @@ static int mlx5_pps_event(struct notifier_block *nb, cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, clock->cycles.mult); clock->pps_info.start[pin] = cycles_now + cycles_delta; - schedule_work(&clock->pps_info.out_work); write_sequnlock_irqrestore(&clock->lock, flags); + schedule_work(&clock->pps_info.out_work); break; default: mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", From 0d2ffdc8d4002a62de31ff7aa3bef28c843c3cbe Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 16 Jun 2020 12:07:10 +0300 Subject: [PATCH 06/16] net/mlx5: Don't call timecounter cyc2time directly from 1PPS flow Before calling timecounter_cyc2time(), clock->lock must be taken. Use mlx5_timecounter_cyc2time instead which guarantees a safe access. Fixes: afc98a0b46d8 ("net/mlx5: Update ptp_clock_event foreach PPS event") Signed-off-by: Eran Ben Elisha --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index f8465e42b238d..7fc59e01a3539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -557,8 +557,9 @@ static int mlx5_pps_event(struct notifier_block *nb, switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; - ptp_event.timestamp = timecounter_cyc2time(&clock->tc, - be64_to_cpu(eqe->data.pps.time_stamp)); + ptp_event.timestamp = + mlx5_timecounter_cyc2time(clock, + be64_to_cpu(eqe->data.pps.time_stamp)); if (clock->pps_info.enabled) { ptp_event.type = PTP_CLOCK_PPSUSR; ptp_event.pps_times.ts_real = From f552be54e0d601c7d797ad03f91c539c69f88a32 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 30 Apr 2020 01:59:20 +0000 Subject: [PATCH 07/16] net/mlx5e: Return a valid errno if can't get lag device index Change the return value to -ENOENT, to make it more meaningful. Signed-off-by: Jianbo Liu Reviewed-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 7 ++++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 874c70e8cc540..8b6e2aae27834 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -102,7 +102,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, if (ldev->pf[i].netdev == ndev) return i; - return -1; + return -ENOENT; } static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) @@ -374,7 +374,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx > -1) + if (idx >= 0) bond_status |= (1 << idx); num_slaves++; @@ -418,7 +418,7 @@ static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, return 0; idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); - if (idx == -1) + if (idx < 0) return 0; /* This information is used to determine virtual to physical diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 9e68f5926ab6e..d192d25cff33e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -131,7 +131,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct net_device *nh_dev = nh->fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); - mlx5_lag_set_port_affinity(ldev, ++i); + if (i < 0) + i = MLX5_LAG_NORMAL_AFFINITY; + else + ++i; + + mlx5_lag_set_port_affinity(ldev, i); } return; } From 1a3c91148339dd334e8e15ca9397c92d32c5cb0f Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 17 Apr 2020 02:55:46 +0000 Subject: [PATCH 08/16] net/mlx5e: Add LAG warning for unsupported tx type If bond tx type is not active-backup or hash, LAG offload can't be enabled. When CHANGEUPPER event is received, and both PFs (and only them) under the same lag master are about to be enslaved, a warning is returned for user to know the offload failure, otherwise PFs are enslaved silently without LAG offload activated. Signed-off-by: Jianbo Liu Reviewed-by: Raed Salem Reviewed-by: Roi Dayan Reviewed-by: Jiri Pirko Reviewed-by: Raed Salem Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 8b6e2aae27834..191d3d5be46d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -355,7 +355,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, { struct net_device *upper = info->upper_dev, *ndev_tmp; struct netdev_lag_upper_info *lag_upper_info = NULL; - bool is_bonded; + bool is_bonded, is_in_lag, mode_supported; int bond_status = 0; int num_slaves = 0; int idx; @@ -391,13 +391,18 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, /* Determine bonding status: * A device is considered bonded if both its physical ports are slaves * of the same lag master, and only them. - * Lag mode must be activebackup or hash. */ - is_bonded = (num_slaves == MLX5_MAX_PORTS) && - (bond_status == 0x3) && - ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) || - (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)); + is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; + /* Lag mode must be activebackup or hash. */ + mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; + + if (is_in_lag && !mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + is_bonded = is_in_lag && mode_supported; if (tracker->is_bonded != is_bonded) { tracker->is_bonded = is_bonded; return 1; From 9b412cc35f0025536a1f1e951a7f4cead0ed15dc Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Fri, 17 Apr 2020 08:53:16 +0000 Subject: [PATCH 09/16] net/mlx5e: Add LAG warning if bond slave is not lag master LAG offload can't be enabled if the enslaved PF is not lag master, which is indicated by HCA capabilities bit. It is cleared if more than 64 VFs are configured for this PF. Previously, a data structure is created to store lag info, including PFs to be enslaved, then a handler is registered for netdev notifier. However, this initialization is skipped if PF is not lag master. So PF can't handle CHANGEUPPER event from upper bond device. Even worse, PF is enslaved silently, and LAG offload is not activated. Fix this by registering netdev notifier for PFs which are not lag masters. When CHANGEUPPER event is received, and both physical ports (and only them) on the same NIC are about to be enslaved, a warning is returned for user to know it. Signed-off-by: Jianbo Liu Reviewed-by: Raed Salem Reviewed-by: Roi Dayan Reviewed-by: Jiri Pirko Reviewed-by: Raed Salem Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 43 ++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/lag.h | 7 +++ .../net/ethernet/mellanox/mlx5/core/lag_mp.c | 2 +- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 191d3d5be46d4..33081b24f10aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -271,7 +271,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) bool do_bond, roce_lag; int err; - if (!dev0 || !dev1) + if (!mlx5_lag_is_ready(ldev)) return; spin_lock(&lag_lock); @@ -394,6 +394,12 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, */ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; + if (!mlx5_lag_is_ready(ldev) && is_in_lag) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + return 0; + } + /* Lag mode must be activebackup or hash. */ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; @@ -450,6 +456,10 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, return NOTIFY_DONE; ldev = container_of(this, struct mlx5_lag, nb); + + if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) + return NOTIFY_DONE; + tracker = ldev->tracker; switch (event) { @@ -498,14 +508,14 @@ static void mlx5_lag_dev_free(struct mlx5_lag *ldev) kfree(ldev); } -static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev, - struct net_device *netdev) +static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) { unsigned int fn = PCI_FUNC(dev->pdev->devfn); if (fn >= MLX5_MAX_PORTS) - return; + return -EPERM; spin_lock(&lag_lock); ldev->pf[fn].dev = dev; @@ -516,6 +526,8 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, dev->priv.lag = ldev; spin_unlock(&lag_lock); + + return fn; } static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, @@ -542,11 +554,9 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; - int err; + int i, err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) + if (!MLX5_CAP_GEN(dev, vport_group_manager)) return; tmp_dev = mlx5_get_next_phys_dev(dev); @@ -561,7 +571,18 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) } } - mlx5_lag_dev_add_pf(ldev, dev, netdev); + if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0) + return; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + tmp_dev = ldev->pf[i].dev; + if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) || + MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS) + break; + } + + if (i >= MLX5_MAX_PORTS) + ldev->flags |= MLX5_LAG_FLAG_READY; if (!ldev->nb.notifier_call) { ldev->nb.notifier_call = mlx5_lag_netdev_event; @@ -592,6 +613,8 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) mlx5_lag_dev_remove_pf(ldev, dev); + ldev->flags &= ~MLX5_LAG_FLAG_READY; + for (i = 0; i < MLX5_MAX_PORTS; i++) if (ldev->pf[i].dev) break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index f1068aac64067..8d8cf2d0bc6d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -16,6 +16,7 @@ enum { MLX5_LAG_FLAG_ROCE = 1 << 0, MLX5_LAG_FLAG_SRIOV = 1 << 1, MLX5_LAG_FLAG_MULTIPATH = 1 << 2, + MLX5_LAG_FLAG_READY = 1 << 3, }; #define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ @@ -59,6 +60,12 @@ __mlx5_lag_is_active(struct mlx5_lag *ldev) return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); } +static inline bool +mlx5_lag_is_ready(struct mlx5_lag *ldev) +{ + return ldev->flags & MLX5_LAG_FLAG_READY; +} + void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index d192d25cff33e..88e58ac902def 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -11,7 +11,7 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { - if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) + if (!mlx5_lag_is_ready(ldev)) return false; return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, From 4e9a9ef7d8a977596e8de8d917bac973d34a9171 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Tue, 23 Jun 2020 12:32:31 -0700 Subject: [PATCH 10/16] net/mlx5: E-Switch, Check and enable metadata support flag before using Check E-Switch capabilities and enable metadata support flag before using it to setup other features that need metadata. Signed-off-by: Vu Pham Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index b381cbca5852c..4cbadb15297c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1968,16 +1968,9 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; - int err; - - if (esw_use_vport_metadata(esw)) - esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); - err = esw_vport_create_offloads_acl_tables(esw, vport); - if (err) - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; - return err; + return esw_vport_create_offloads_acl_tables(esw, vport); } static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) @@ -1986,7 +1979,6 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); esw_vport_destroy_offloads_acl_tables(esw, vport); - esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } static int esw_offloads_steering_init(struct mlx5_eswitch *esw) @@ -2146,6 +2138,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; + if (esw_use_vport_metadata(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + err = esw_set_passing_vport_metadata(esw, true); if (err) goto err_vport_metadata; @@ -2178,6 +2173,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) err_steering_init: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); return err; @@ -2211,6 +2207,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; From 406493a52f48a20b36b34701d541081cae00b900 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Tue, 23 Jun 2020 11:53:34 -0700 Subject: [PATCH 11/16] net/mlx5: E-Switch, Dedicated metadata for uplink vport Uplink vport must have a dedicated metadata with vhca_id being part of the metadata. Fixes: 133dcfc577ea ("net/mlx5: E-Switch, Alloc and free unique metadata for match") Signed-off-by: Vu Pham Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4cbadb15297c8..9c740ce73085f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1908,9 +1908,6 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata) static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (vport->vport == MLX5_VPORT_UPLINK) - return 0; - vport->default_metadata = mlx5_esw_match_metadata_alloc(esw); vport->metadata = vport->default_metadata; return vport->metadata ? 0 : -ENOSPC; @@ -1919,7 +1916,7 @@ static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw, static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata) + if (!vport->default_metadata) return; WARN_ON(vport->metadata != vport->default_metadata); From fc99c3d6371c14343b1822cc8846ed3a54dbafe3 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Fri, 22 May 2020 11:48:38 -0700 Subject: [PATCH 12/16] net/mlx5: E-Switch, Setup all vports' metadata to support peer miss rule In merged eswitch configuration, peer miss rule is setup for all vports. If metadata is enabled, peer miss rule with metadata matching will be configured instead of source port matching; however, some vports that have not yet been enabled don't have default_metadata setup and their default_metadata will be zero. Hence, setup/cleanup default metadata for all vports when eswitch moves in/out of offloads mode. Fixes: 133dcfc577ea ("net/mlx5: E-Switch, Alloc and free unique metadata for match") Signed-off-by: Vu Pham Reviewed-by: Bodong Wang Reviewed-by: Roi Dayan Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9c740ce73085f..3321bb1f188d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1923,19 +1923,49 @@ static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw, mlx5_esw_match_metadata_free(esw, vport->default_metadata); } +static void esw_offloads_metadata_uninit(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return; + + mlx5_esw_for_all_vports_reverse(esw, i, vport) + esw_offloads_vport_metadata_cleanup(esw, vport); +} + +static int esw_offloads_metadata_init(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int err; + int i; + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) + return 0; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_offloads_vport_metadata_setup(esw, vport); + if (err) + goto metadata_err; + } + + return 0; + +metadata_err: + esw_offloads_metadata_uninit(esw); + return err; +} + int esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int err; - err = esw_offloads_vport_metadata_setup(esw, vport); - if (err) - goto metadata_err; - err = esw_acl_ingress_ofld_setup(esw, vport); if (err) - goto ingress_err; + return err; if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_acl_egress_ofld_setup(esw, vport); @@ -1947,9 +1977,6 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw, egress_err: esw_acl_ingress_ofld_cleanup(esw, vport); -ingress_err: - esw_offloads_vport_metadata_cleanup(esw, vport); -metadata_err: return err; } @@ -1959,7 +1986,6 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw, { esw_acl_egress_ofld_cleanup(vport); esw_acl_ingress_ofld_cleanup(esw, vport); - esw_offloads_vport_metadata_cleanup(esw, vport); } static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) @@ -2138,6 +2164,10 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (esw_use_vport_metadata(esw)) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + err = esw_offloads_metadata_init(esw); + if (err) + goto err_metadata; + err = esw_set_passing_vport_metadata(esw, true); if (err) goto err_vport_metadata; @@ -2170,6 +2200,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) err_steering_init: esw_set_passing_vport_metadata(esw, false); err_vport_metadata: + esw_offloads_metadata_uninit(esw); +err_metadata: esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); @@ -2204,6 +2236,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); esw_offloads_steering_cleanup(esw); + esw_offloads_metadata_uninit(esw); esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; mlx5_rdma_disable_roce(esw->dev); mutex_destroy(&esw->offloads.termtbl_mutex); From cd1ef966214c9f83af24e65593eb34186b31bea3 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 18 May 2020 15:02:45 -0700 Subject: [PATCH 13/16] net/mlx5: E-Switch, Use vport metadata matching by default Multiple features use metadata matching such as bond vport in live migration, multi-port RoCE mode, stacked devices; hence, enable vport metadata matching by default. Fixes: 1e62e222db2e ("net/mlx5: E-Switch, Use vport metadata matching only when mandatory") Signed-off-by: Vu Pham Reviewed-by: Bodong Wang Reviewed-by: Roi Dayan Reviewed-by: Parav Pandit Reviewed-by: Mark Bloch Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/eswitch_offloads.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3321bb1f188d4..b23d20e164958 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1864,18 +1864,6 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) return true; } -static bool -esw_check_vport_match_metadata_mandatory(const struct mlx5_eswitch *esw) -{ - return mlx5_core_mp_enabled(esw->dev); -} - -static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw) -{ - return esw_check_vport_match_metadata_mandatory(esw) && - esw_check_vport_match_metadata_supported(esw); -} - u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw) { u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1; @@ -2159,9 +2147,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) err = mlx5_esw_host_number_init(esw); if (err) - goto err_vport_metadata; + goto err_metadata; - if (esw_use_vport_metadata(esw)) + if (esw_check_vport_match_metadata_supported(esw)) esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; err = esw_offloads_metadata_init(esw); From f02882102b1d82e1863c1afb92ca6adf3cfd2ccc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 23 Aug 2020 15:54:43 +0300 Subject: [PATCH 14/16] net/mlx5e: Add support for tc trap Support tc trap such that packets can explicitly be forwarded to slow path if they match a specific rule. In the example below, we want packets with src IP equals 7.7.7.8 to be forwarded to software, in which case it will get to the appropriate representor net device. $ tc filter add dev eth1 protocol ip prio 1 root flower skip_sw \ src_ip 7.7.7.8 action trap Signed-off-by: Eli Cohen Reviewed-by: Roi Dayan Reviewed-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd53d101d8fd2..2dded22a64a34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3943,6 +3943,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; break; + case FLOW_ACTION_TRAP: + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, + "action trap is supported as a sole action only"); + return -EOPNOTSUPP; + } + action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT); + attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH; + break; case FLOW_ACTION_MPLS_PUSH: if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l2_to_l3_tunnel) || From 748cde9a3802e1ababedabbf759b3eedbaeaba52 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Thu, 3 Sep 2020 11:02:10 +0300 Subject: [PATCH 15/16] net/mlx5e: Add IPv6 traffic class (DSCP) header rewrite support Add support for rewriting of IPV6 DSCP part of traffic class field. Next commands, for example, can be used to offload rewrite action: OVS: $ ovs-ofctl add-flow ovs-sriov "tcpv6, in_port=REP, \ actions=mod_nw_tos:68, output:NIC" iproute2: $ tc filter add dev REP ingress protocol ipv6 prio 1 flower skip_sw \ ip_proto tcp \ action pedit ex munge ip6 traffic_class set 68 retain 0xfc pipe \ action mirred egress redirect dev NIC Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 2dded22a64a34..817c503693fcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2615,6 +2615,7 @@ static struct mlx5_fields fields[] = { OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0, dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]), OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit), + OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp), OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport), OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport), From b7cf0806e8783e38f881cae3c56f0869e70b8da2 Mon Sep 17 00:00:00 2001 From: Ofer Levi Date: Sun, 17 May 2020 10:16:49 +0300 Subject: [PATCH 16/16] net/mlx5e: Add CQE compression support for multi-strides packets Add CQE compression support for completions of packets that span multiple strides in a Striding RQ, per the HW capability. In our memory model, we use small strides (256B as of today) for the non-linear SKB mode. This feature allows CQE compression to work also for multiple strides packets. In this case decompressing the mini CQE array will use stride index provided by HW as part of the mini CQE. Before this feature, compression was possible only for single-strided packets, i.e. for packets of size up to 256 bytes when in non-linear mode, and the index was maintained by SW. This feature is supported for ConnectX-5 and above. Feature performance test: This was whitebox-tested, we reduced the PCI speed from 125Gb/s to 62.5Gb/s to overload pci and manipulated mlx5 driver to drop incoming packets before building the SKB to achieve low cpu utilization. Outcome is low cpu utilization and bottleneck on pci only. Test setup: Server: Intel(R) Xeon(R) Silver 4108 CPU @ 1.80GHz server, 32 cores NIC: ConnectX-6 DX. Sender side generates 300 byte packets at full pci bandwidth. Receiver side configuration: Single channel, one cpu processing with one ring allocated. Cpu utilization is ~20% while pci bandwidth is fully utilized. For the generated traffic and interface MTU of 4500B (to activate the non-linear SKB mode), packet rate improvement is about 19% from ~17.6Mpps to ~21Mpps. Without this feature, counters show no CQE compression blocks for this setup, while with the feature, counters show ~20.7Mpps compressed CQEs in ~500K compression blocks. Signed-off-by: Ofer Levi Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 +++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 11 ++++++++++- include/linux/mlx5/device.h | 3 ++- 4 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 4f33658da25a2..95aab8b429cf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -265,6 +265,7 @@ enum { MLX5E_RQ_STATE_NO_CSUM_COMPLETE, MLX5E_RQ_STATE_CSUM_FULL, /* cqe_csum_full hw bit is set */ MLX5E_RQ_STATE_FPGA_TLS, /* FPGA TLS enabled */ + MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX /* set when mini_cqe_resp_stride_index cap is used */ }; struct mlx5e_cq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 26834625556df..b057a6c3a6d5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -848,6 +848,13 @@ int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + /* For CQE compression on striding RQ, use stride index provided by + * HW if capability is supported. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) && + MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index)) + __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state); + return 0; err_destroy_rq: @@ -2182,6 +2189,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; + bool hw_stridx = false; void *cqc = param->cqc; u8 log_cq_size; @@ -2189,6 +2197,7 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -2196,7 +2205,8 @@ void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { - MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ? + MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_comp_en, 1); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7aab69e991a56..c9c82b14060a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -137,8 +137,17 @@ static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, title->check_sum = mini_cqe->checksum; title->op_own &= 0xf0; title->op_own |= 0x01 & (cqcc >> wq->fbc.log_sz); - title->wqe_counter = cpu_to_be16(cqd->wqe_counter); + /* state bit set implies linked-list striding RQ wq type and + * HW stride index capability supported + */ + if (test_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state)) { + title->wqe_counter = mini_cqe->stridx; + return; + } + + /* HW stride index capability not supported */ + title->wqe_counter = cpu_to_be16(cqd->wqe_counter); if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) cqd->wqe_counter += mpwrq_get_cqe_consumed_strides(title); else diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 4d3376e20f5e6..81ca5989009b4 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -816,7 +816,7 @@ struct mlx5_mini_cqe8 { __be32 rx_hash_result; struct { __be16 checksum; - __be16 rsvd; + __be16 stridx; }; struct { __be16 wqe_counter; @@ -836,6 +836,7 @@ enum { enum { MLX5_CQE_FORMAT_CSUM = 0x1, + MLX5_CQE_FORMAT_CSUM_STRIDX = 0x3, }; #define MLX5_MINI_CQE_ARRAY_SIZE 8