From b3ea4c4fdc673acbb4d8333b7f4c9bd3a9287730 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 6 Apr 2020 16:00:35 +0300 Subject: [PATCH 01/12] net/mlx5e: Change reporters create functions to return void Creation of devlink health reporters is not fatal for mlx5e instance load. In case of error in reporter's creation, the return value is ignored. Change all reporters creation functions to return void. In addition, with this change, a failure in creating a reporter, will not prevent the driver from trying to create the next reporter in the list. Signed-off-by: Eran Ben Elisha Reviewed-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/health.c | 15 +++------------ .../net/ethernet/mellanox/mlx5/core/en/health.h | 6 +++--- .../ethernet/mellanox/mlx5/core/en/reporter_rx.c | 5 ++--- .../ethernet/mellanox/mlx5/core/en/reporter_tx.c | 5 ++--- 4 files changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 7283443868f3c..d0625ee923d6d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -97,19 +97,10 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg * return 0; } -int mlx5e_health_create_reporters(struct mlx5e_priv *priv) +void mlx5e_health_create_reporters(struct mlx5e_priv *priv) { - int err; - - err = mlx5e_reporter_tx_create(priv); - if (err) - return err; - - err = mlx5e_reporter_rx_create(priv); - if (err) - return err; - - return 0; + mlx5e_reporter_tx_create(priv); + mlx5e_reporter_rx_create(priv); } void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 38f97f79ef167..895d03d56c9d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -16,7 +16,7 @@ static inline bool cqe_syndrome_needs_recover(u8 syndrome) syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; } -int mlx5e_reporter_tx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv); void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); @@ -26,7 +26,7 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg * int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg); -int mlx5e_reporter_rx_create(struct mlx5e_priv *priv); +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv); void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq); void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); @@ -46,7 +46,7 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv); int mlx5e_health_report(struct mlx5e_priv *priv, struct devlink_health_reporter *reporter, char *err_str, struct mlx5e_err_ctx *err_ctx); -int mlx5e_health_create_reporters(struct mlx5e_priv *priv); +void mlx5e_health_create_reporters(struct mlx5e_priv *priv); void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv); void mlx5e_health_channels_update(struct mlx5e_priv *priv); int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index c209579fc213b..5161a1954577f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -563,7 +563,7 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { #define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 -int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) +void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { struct devlink *devlink = priv_to_devlink(priv->mdev); struct devlink_health_reporter *reporter; @@ -575,10 +575,9 @@ int mlx5e_reporter_rx_create(struct mlx5e_priv *priv) if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", PTR_ERR(reporter)); - return PTR_ERR(reporter); + return; } priv->rx_reporter = reporter; - return 0; } void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9805fc0855124..b95dc15f23b94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -406,7 +406,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 -int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) +void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; @@ -421,10 +421,9 @@ int mlx5e_reporter_tx_create(struct mlx5e_priv *priv) netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", PTR_ERR(reporter)); - return PTR_ERR(reporter); + return; } priv->tx_reporter = reporter; - return 0; } void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) From e74e28aee1a2382814eae8249651cc4747460d9f Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 18:50:49 +0300 Subject: [PATCH 02/12] net/mlx5e: Add a flush timeout define During queue's recovery, driver waits for flush. The flush timeout is set to 2 seconds. Add a define for this value for the benefit of RX and TX reporters. Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 895d03d56c9d2..2938553a7606b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -33,6 +33,7 @@ void mlx5e_reporter_rq_cqe_err(struct mlx5e_rq *rq); void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq); #define MLX5E_REPORTER_PER_Q_MAX_LEN 256 +#define MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC 2000 struct mlx5e_err_ctx { int (*recover)(void *ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 5161a1954577f..495a3e6bf82bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -29,7 +29,8 @@ static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) static int mlx5e_wait_for_icosq_flush(struct mlx5e_icosq *icosq) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + unsigned long exp_time = jiffies + + msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); while (time_before(jiffies, exp_time)) { if (icosq->cc == icosq->pc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index b95dc15f23b94..6eb2971231d8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -5,7 +5,8 @@ static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) { - unsigned long exp_time = jiffies + msecs_to_jiffies(2000); + unsigned long exp_time = jiffies + + msecs_to_jiffies(MLX5E_REPORTER_FLUSH_TIMEOUT_MSEC); while (time_before(jiffies, exp_time)) { if (sq->cc == sq->pc) From b9961af7b8acf6420845cd1660228fe374710df9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 11:24:41 +0300 Subject: [PATCH 03/12] net/mlx5e: Remove redundant RQ state query When received a CQE error, the driver inspect the syndrome given by the firmware. RQ recovery is initiated only as a result of a fatal syndrome; syndrome which set the RQ into an error state. Hence no need to query the RQ state at the beginning of the recovery process. Add additional debug prints before recovering. Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/reporter_rx.c | 18 +----------------- .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 6 +++--- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +++- 3 files changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 495a3e6bf82bc..b8b32aef13637 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -124,25 +124,9 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state) static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) { - struct mlx5_core_dev *mdev; - struct net_device *dev; - struct mlx5e_rq *rq; - u8 state; + struct mlx5e_rq *rq = ctx; int err; - rq = ctx; - mdev = rq->mdev; - dev = rq->netdev; - err = mlx5e_query_rq_state(mdev, rq->rqn, &state); - if (err) { - netdev_err(dev, "Failed to query RQ 0x%x state. err = %d\n", - rq->rqn, err); - goto out; - } - - if (state != MLX5_RQC_STATE_ERR) - goto out; - mlx5e_deactivate_rq(rq); mlx5e_free_rx_descs(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index e9d4a61b6bbb2..be7692897fc1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -268,7 +268,7 @@ static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) } } -static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 sqn, +static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn, struct mlx5_err_cqe *err_cqe) { struct mlx5_cqwq *wq = &cq->wq; @@ -277,8 +277,8 @@ static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 sqn, ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1); netdev_err(cq->channel->netdev, - "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", - cq->mcq.cqn, ci, sqn, + "Error cqe on cqn 0x%x, ci 0x%x, qn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n", + cq->mcq.cqn, ci, qn, get_cqe_opcode((struct mlx5_cqe64 *)err_cqe), err_cqe->syndrome, err_cqe->vendor_err_synd); mlx5_dump_err_cqe(cq->mdev, err_cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8b42f729a4f77..350f9c54e508f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1187,8 +1187,10 @@ static void trigger_report(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; if (cqe_syndrome_needs_recover(err_cqe->syndrome) && - !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) + !test_and_set_bit(MLX5E_RQ_STATE_RECOVERING, &rq->state)) { + mlx5e_dump_error_cqe(&rq->cq, rq->rqn, err_cqe); queue_work(rq->channel->priv->wq, &rq->recover_work); + } } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) From 4537f524b4401ae9a741b849c5e0d49c9b2b6803 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 12:18:22 +0300 Subject: [PATCH 04/12] net/mlx5e: Align RX/TX reporters diagnose output format Change the hierarchy of the RX reporter 'Common config' in the diagnose output to match the 'Common config' of the TX reporter which reflects that CQ is a helper to the traffic queues. Before: $ devlink health diagnose pci/0000:00:0b.0 reporter rx Common config: RQ: type: 2 stride size: 2048 size: 8 CQ: stride size: 64 size: 1024 RQs: ... After: $ devlink health diagnose pci/0000:00:0b.0 reporter rx Common config: RQ: type: 2 stride size: 2048 size: 8 CQ: stride size: 64 size: 1024 RQs: ... Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index b8b32aef13637..f1edde1ab8bc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -284,11 +284,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg); if (err) goto unlock; - err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg); + err = mlx5e_reporter_named_obj_nest_end(fmsg); if (err) goto unlock; From 5d95c816608cd6e7ccfe3d4c8083465973b9f3d1 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 13:30:51 +0300 Subject: [PATCH 05/12] net/mlx5e: Move RQ helpers to txrx.h Use txrx.h to contain helper function regarding TX/RX. In the coming patches, I will add more RQ helpers. Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 ------------------- .../mellanox/mlx5/core/en/reporter_rx.c | 1 + .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 20 +++++++++++++++++++ 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2957edb7e0b7a..c44669102626b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -852,26 +852,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); -static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return mlx5_wq_ll_get_size(&rq->mpwqe.wq); - default: - return mlx5_wq_cyc_get_size(&rq->wqe.wq); - } -} - -static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) -{ - switch (rq->wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return rq->mpwqe.wq.cur_sz; - default: - return rq->wqe.wq.cur_sz; - } -} - bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev); bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index f1edde1ab8bc6..bfdf9c185f02d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -3,6 +3,7 @@ #include "health.h" #include "params.h" +#include "txrx.h" static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index be7692897fc1b..ed9e0b8a6f9e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -284,6 +284,26 @@ static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 qn, mlx5_dump_err_cqe(cq->mdev, err_cqe); } +static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_size(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_size(&rq->wqe.wq); + } +} + +static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return rq->mpwqe.wq.cur_sz; + default: + return rq->wqe.wq.cur_sz; + } +} + /* SW parser related functions */ struct mlx5e_swp_spec { From fc42d0de16debb2c8ba83e1345a55a18ff87e2d9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 13:36:26 +0300 Subject: [PATCH 06/12] net/mlx5e: Add helper to get RQ WQE's head Add helper which retrieves the RQ WQE's head. Use this helper in RX reporter diagnose callback. Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 5 +---- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/wq.h | 4 ++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index bfdf9c185f02d..f0e639ef4ec5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -181,7 +181,6 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, struct devlink_fmsg *fmsg) { struct mlx5e_priv *priv = rq->channel->priv; - struct mlx5e_params *params; struct mlx5e_icosq *icosq; u8 icosq_hw_state; int wqes_sz; @@ -189,7 +188,6 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, u16 wq_head; int err; - params = &priv->channels.params; icosq = &rq->channel->icosq; err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state); if (err) @@ -200,8 +198,7 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, return err; wqes_sz = mlx5e_rqwq_get_cur_sz(rq); - wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq); + wq_head = mlx5e_rqwq_get_head(rq); err = devlink_fmsg_obj_nest_start(fmsg); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index ed9e0b8a6f9e4..d787e8fc2e991 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -304,6 +304,16 @@ static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq) } } +static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_head(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_head(&rq->wqe.wq); + } +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 4cadc336593f1..27dece35df7e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -290,4 +290,8 @@ static inline void mlx5_wq_ll_update_db_record(struct mlx5_wq_ll *wq) *wq->db = cpu_to_be32(wq->wqe_ctr); } +static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq) +{ + return wq->head; +} #endif /* __MLX5_WQ_H__ */ From de6c6ab7e8c6431d67d1a87477fee2cfc0fa0845 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 14:41:13 +0300 Subject: [PATCH 07/12] net/mlx5e: Add helper to get the RQ WQE counter Add a helper which retrieves the RQ's WQE counter. Use this helper in the RX reporter diagnose callback. $ devlink health diagnose pci/0000:00:0b.0 reporter rx Common config: RQ: type: 2 stride size: 2048 size: 8 CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 2113 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1032 HW status: 0 channel ix: 1 rqn: 2118 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1036 HW status: 0 Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/wq.h | 11 +++++++++++ 3 files changed, 27 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index f0e639ef4ec5f..bec804295c52e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -183,6 +183,7 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, struct mlx5e_priv *priv = rq->channel->priv; struct mlx5e_icosq *icosq; u8 icosq_hw_state; + u16 wqe_counter; int wqes_sz; u8 hw_state; u16 wq_head; @@ -199,6 +200,7 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, wqes_sz = mlx5e_rqwq_get_cur_sz(rq); wq_head = mlx5e_rqwq_get_head(rq); + wqe_counter = mlx5e_rqwq_get_wqe_counter(rq); err = devlink_fmsg_obj_nest_start(fmsg); if (err) @@ -220,6 +222,10 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, if (err) return err; + err = devlink_fmsg_u32_pair_put(fmsg, "WQE counter", wqe_counter); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index d787e8fc2e991..cf425a60cddcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -314,6 +314,16 @@ static inline u16 mlx5e_rqwq_get_head(struct mlx5e_rq *rq) } } +static inline u16 mlx5e_rqwq_get_wqe_counter(struct mlx5e_rq *rq) +{ + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5_wq_ll_get_counter(&rq->mpwqe.wq); + default: + return mlx5_wq_cyc_get_counter(&rq->wqe.wq); + } +} + /* SW parser related functions */ struct mlx5e_swp_spec { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 27dece35df7e1..e5c4dcd1425e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -172,6 +172,11 @@ static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) return !equal && !smaller; } +static inline u16 mlx5_wq_cyc_get_counter(struct mlx5_wq_cyc *wq) +{ + return wq->wqe_ctr; +} + static inline u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) { return wq->fbc.sz_m1 + 1; @@ -294,4 +299,10 @@ static inline u16 mlx5_wq_ll_get_head(struct mlx5_wq_ll *wq) { return wq->head; } + +static inline u16 mlx5_wq_ll_get_counter(struct mlx5_wq_ll *wq) +{ + return wq->wqe_ctr; +} + #endif /* __MLX5_WQ_H__ */ From d5cbedd7fcb3f3a102d2b485acb5c09ac8085e49 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 18 May 2020 09:21:23 +0300 Subject: [PATCH 08/12] net/mlx5e: Rename reporter's helpers Change prefix to match resident file: %s/mlx5e_reporter_cq_diagnose/mlx5e_health_cq_diag_fmsg %s/mlx5e_reporter_cq_common_diagnose/mlx5e_health_cq_common_diag_fmsg %s/mlx5e_reporter_named_obj_nest_start/mlx5e_health_fmsg_named_obj_nest_start %s/mlx5e_reporter_named_obj_nest_end/mlx5e_health_fmsg_named_obj_nest_end Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/health.c | 20 ++++---- .../ethernet/mellanox/mlx5/core/en/health.h | 8 ++-- .../mellanox/mlx5/core/en/reporter_rx.c | 48 +++++++++---------- .../mellanox/mlx5/core/en/reporter_tx.c | 32 ++++++------- 4 files changed, 54 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index d0625ee923d6d..1b735b54b3aba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -5,7 +5,7 @@ #include "lib/eq.h" #include "lib/mlx5.h" -int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) { int err; @@ -20,7 +20,7 @@ int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name) return 0; } -int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg) { int err; @@ -35,7 +35,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg) return 0; } -int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) { struct mlx5e_priv *priv = cq->channel->priv; u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {}; @@ -50,7 +50,7 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context); hw_status = MLX5_GET(cqc, cqc, status); - err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); if (err) return err; @@ -62,14 +62,14 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; return 0; } -int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) { u8 cq_log_stride; u32 cq_sz; @@ -78,7 +78,7 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg * cq_sz = mlx5_cqwq_get_size(&cq->wq); cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq); - err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); if (err) return err; @@ -90,7 +90,7 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg * if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; @@ -282,7 +282,7 @@ int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl); if (err) return err; @@ -294,7 +294,7 @@ int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 2938553a7606b..6e48518d3d5b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -21,10 +21,10 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv); void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq); int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); -int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); -int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); -int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); -int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg); +int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); +int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg); void mlx5e_reporter_rx_create(struct mlx5e_priv *priv); void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index bec804295c52e..4e1a01d871b7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -238,7 +238,7 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, if (err) return err; - err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg); + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); if (err) return err; @@ -268,11 +268,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, rq_sz = mlx5e_rqwq_get_size(generic_rq); rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL)); - err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common config"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common config"); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); if (err) goto unlock; @@ -288,15 +288,15 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; - err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg); + err = mlx5e_health_cq_common_diag_fmsg(&generic_rq->cq, fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; @@ -329,7 +329,7 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); if (err) return err; @@ -339,15 +339,15 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "ICOSQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); if (err) return err; @@ -359,11 +359,11 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); if (err) return err; @@ -374,11 +374,11 @@ static int mlx5e_rx_reporter_dump_icosq(struct mlx5e_priv *priv, struct devlink_ if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - return mlx5e_reporter_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, @@ -391,7 +391,7 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); if (err) return err; @@ -401,15 +401,15 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RQ"); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); if (err) return err; @@ -421,11 +421,11 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "receive_buff"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "receive_buff"); if (err) return err; @@ -435,11 +435,11 @@ static int mlx5e_rx_reporter_dump_rq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - return mlx5e_reporter_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, @@ -451,7 +451,7 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "RX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX Slice"); if (err) return err; @@ -461,7 +461,7 @@ static int mlx5e_rx_reporter_dump_all_rqs(struct mlx5e_priv *priv, if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 6eb2971231d8d..9f712ff2faf9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -166,7 +166,7 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, if (err) return err; - err = mlx5e_reporter_cq_diagnose(&sq->cq, fmsg); + err = mlx5e_health_cq_diag_fmsg(&sq->cq, fmsg); if (err) return err; @@ -195,11 +195,11 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, sq_sz = mlx5_wq_cyc_get_size(&generic_sq->wq); sq_stride = MLX5_SEND_WQE_BB; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "Common Config"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "Common Config"); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); if (err) goto unlock; @@ -211,15 +211,15 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, if (err) goto unlock; - err = mlx5e_reporter_cq_common_diagnose(&generic_sq->cq, fmsg); + err = mlx5e_health_cq_common_diag_fmsg(&generic_sq->cq, fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) goto unlock; @@ -257,7 +257,7 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); if (err) return err; @@ -267,15 +267,15 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SQ"); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "QPC"); if (err) return err; @@ -287,11 +287,11 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "send_buff"); if (err) return err; @@ -301,11 +301,11 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; - return mlx5e_reporter_named_obj_nest_end(fmsg); + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, @@ -317,7 +317,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; - err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice"); + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "SX Slice"); if (err) return err; @@ -327,7 +327,7 @@ static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, if (err) return err; - err = mlx5e_reporter_named_obj_nest_end(fmsg); + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; From 3c9d1699b8575bbb622ce3e8880b9ae41a905969 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 3 May 2020 15:04:15 +0300 Subject: [PATCH 09/12] net/mlx5e: Enhance CQ data on diagnose output Add CQ's consumer index and size to the CQ's diagnose output retruved on RX/TX reporter diadgnose. $ devlink health diagnose pci/0000:00:0b.0 reporter rx Common config: RQ: type: 2 stride size: 2048 size: 8 CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 2413 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1032 HW status: 0 ci: 0 size: 1024 channel ix: 1 rqn: 2418 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1036 HW status: 0 ci: 0 size: 1024 $ devlink health diagnose pci/0000:00:0b.0 reporter tx Common Config: SQ: stride size: 64 size: 1024 CQ: stride size: 64 size: 1024 SQs: channel ix: 0 tc: 0 txq ix: 0 sqn: 2412 HW state: 1 stopped: false cc: 0 pc: 0 CQ: cqn: 1030 HW status: 0 ci: 0 size: 1024 channel ix: 1 tc: 0 txq ix: 1 sqn: 2417 HW state: 1 stopped: false cc: 5 pc: 5 CQ: cqn: 1034 HW status: 0 ci: 5 size: 1024 Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 1b735b54b3aba..4bd46e109dbe7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -62,6 +62,14 @@ int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg) if (err) return err; + err = devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq)); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq)); + if (err) + return err; + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); if (err) return err; From 56837c2ae1e716fd29972a6314cb064947718454 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 30 Apr 2020 18:47:39 +0300 Subject: [PATCH 10/12] net/mlx5e: Add EQ info to TX/RX reporter's diagnose Enhance TX/RX reporter's diagnose to include info about the corresponding EQ. $ devlink health diagnose pci/0000:00:0b.0 reporter rx Common config: RQ: type: 2 stride size: 2048 size: 8 CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 1713 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1032 HW status: 0 ci: 0 size: 1024 EQ: eqn: 7 irqn: 42 vecidx: 1 ci: 93 size: 2048 channel ix: 1 rqn: 1718 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1036 HW status: 0 ci: 0 size: 1024 EQ: eqn: 8 irqn: 43 vecidx: 2 ci: 2 size: 2048 $ devlink health diagnose pci/0000:00:0b.0 reporter tx Common Config: SQ: stride size: 64 size: 1024 CQ: stride size: 64 size: 1024 SQs: channel ix: 0 tc: 0 txq ix: 0 sqn: 1712 HW state: 1 stopped: false cc: 91 pc: 91 CQ: cqn: 1030 HW status: 0 ci: 91 size: 1024 EQ: eqn: 7 irqn: 42 vecidx: 1 ci: 93 size: 2048 channel ix: 1 tc: 0 txq ix: 1 sqn: 1717 HW state: 1 stopped: false cc: 0 pc: 0 CQ: cqn: 1034 HW status: 0 ci: 0 size: 1024 EQ: eqn: 8 irqn: 43 vecidx: 2 ci: 2 size: 2048 Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/health.c | 31 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en/health.h | 1 + .../mellanox/mlx5/core/en/reporter_rx.c | 4 +++ .../mellanox/mlx5/core/en/reporter_tx.c | 4 +++ 4 files changed, 40 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 4bd46e109dbe7..3dc200bcfabde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -105,6 +105,37 @@ int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *f return 0; } +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ"); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", eq->core.nent); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + void mlx5e_health_create_reporters(struct mlx5e_priv *priv) { mlx5e_reporter_tx_create(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index 6e48518d3d5b8..b9aadddfd0006 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -23,6 +23,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq); int mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); int mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg); +int mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg); int mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name); int mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 4e1a01d871b7b..5f7fba74cfd40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -242,6 +242,10 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, if (err) return err; + err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 9f712ff2faf9e..465c7cc8d909a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -170,6 +170,10 @@ mlx5e_tx_reporter_build_diagnose_output(struct devlink_fmsg *fmsg, if (err) return err; + err = mlx5e_health_eq_diag_fmsg(sq->cq.mcq.eq, fmsg); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); if (err) return err; From b84921129bc85bea22551e98def8e70e2a44c043 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 3 May 2020 17:02:41 +0300 Subject: [PATCH 11/12] net/mlx5e: Enhance ICOSQ data on RX reporter's diagnose When the RQ is in striding RQ mode, it uses the ICOSQ as a helper queue. In this mode, RX reporter dumps more info about the ICOSQ and its related CQ. $ devlink health diagnose pci/0000:00:0b.0 reporter rx Common config: RQ: type: 2 stride size: 2048 size: 8 CQ: stride size: 64 size: 1024 RQs: channel ix: 0 rqn: 2413 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 CQ: cqn: 1032 HW status: 0 ci: 0 size: 1024 EQ: eqn: 7 irqn: 42 vecidx: 1 ci: 93 size: 2048 ICOSQ: sqn: 2411 HW state: 1 cc: 74 pc: 74 WQE size: 128 CQ: cqn: 1029 cc: 8 size: 128 channel ix: 1 rqn: 2418 HW state: 1 SW state: 5 WQE counter: 7 posted WQEs: 7 cc: 7 CQ: cqn: 1036 HW status: 0 ci: 0 size: 1024 EQ: eqn: 8 irqn: 43 vecidx: 2 ci: 2 size: 2048 ICOSQ: sqn: 2416 HW state: 1 cc: 74 pc: 74 WQE size: 128 CQ: cqn: 1033 cc: 8 size: 128 Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/reporter_rx.c | 59 ++++++++++++++++++- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 5f7fba74cfd40..32ed1067e6dc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -177,6 +177,59 @@ static int mlx5e_rx_reporter_recover(struct devlink_health_reporter *reporter, mlx5e_health_recover_channels(priv); } +static int mlx5e_reporter_icosq_diagnose(struct mlx5e_icosq *icosq, u8 hw_state, + struct devlink_fmsg *fmsg) +{ + int err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "ICOSQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "sqn", icosq->sqn); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "pc", icosq->pc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "WQE size", + mlx5_wq_cyc_get_size(&icosq->wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ"); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cqn", icosq->cq.mcq.cqn); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "cc", icosq->cq.wq.cc); + if (err) + return err; + + err = devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&icosq->cq.wq)); + if (err) + return err; + + err = mlx5e_health_fmsg_named_obj_nest_end(fmsg); + if (err) + return err; + + return mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, struct devlink_fmsg *fmsg) { @@ -234,15 +287,15 @@ static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq, if (err) return err; - err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state); + err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); if (err) return err; - err = mlx5e_health_cq_diag_fmsg(&rq->cq, fmsg); + err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); if (err) return err; - err = mlx5e_health_eq_diag_fmsg(rq->cq.mcq.eq, fmsg); + err = mlx5e_reporter_icosq_diagnose(icosq, icosq_hw_state, fmsg); if (err) return err; From e62055642797a6de80f3576c18e212cbbf5b4361 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 18 May 2020 12:31:38 +0300 Subject: [PATCH 12/12] net/mlx5e: Enhance TX timeout recovery Upon a TX timeout handle, if the TX reporter was not able to recover from the error, reopen the channels. If tried to reopen channels, do not loop over TX queues for timeout. With that, the reporters state and separation will better expose the driver's state. Signed-off-by: Aya Levin Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/reporter_tx.c | 36 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_main.c | 14 ++------ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 465c7cc8d909a..8265843802163 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -83,17 +83,40 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) return err; } +struct mlx5e_tx_timeout_ctx { + struct mlx5e_txqsq *sq; + signed int status; +}; + static int mlx5e_tx_reporter_timeout_recover(void *ctx) { + struct mlx5e_tx_timeout_ctx *to_ctx; + struct mlx5e_priv *priv; struct mlx5_eq_comp *eq; struct mlx5e_txqsq *sq; int err; - sq = ctx; + to_ctx = ctx; + sq = to_ctx->sq; eq = sq->cq.mcq.eq; + priv = sq->channel->priv; err = mlx5e_health_channel_eq_recover(eq, sq->channel); - if (err) - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + if (!err) { + to_ctx->status = 0; /* this sq recovered */ + return err; + } + + err = mlx5e_safe_reopen_channels(priv); + if (!err) { + to_ctx->status = 1; /* all channels recovered */ + return err; + } + + to_ctx->status = err; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_err(priv->netdev, + "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", + err); return err; } @@ -389,9 +412,11 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) { struct mlx5e_priv *priv = sq->channel->priv; char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN]; + struct mlx5e_tx_timeout_ctx to_ctx = {}; struct mlx5e_err_ctx err_ctx = {}; - err_ctx.ctx = sq; + to_ctx.sq = sq; + err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; err_ctx.dump = mlx5e_tx_reporter_dump_sq; snprintf(err_str, sizeof(err_str), @@ -399,7 +424,8 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, jiffies_to_usecs(jiffies - sq->txq->trans_start)); - return mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); + mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); + return to_ctx.status; } static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 046cfb0ea1808..b04c8572adea7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4367,8 +4367,6 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, tx_timeout_work); - bool report_failed = false; - int err; int i; rtnl_lock(); @@ -4386,18 +4384,10 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) continue; if (mlx5e_reporter_tx_timeout(sq)) - report_failed = true; + /* break if tried to reopened channels */ + break; } - if (!report_failed) - goto unlock; - - err = mlx5e_safe_reopen_channels(priv); - if (err) - netdev_err(priv->netdev, - "mlx5e_safe_reopen_channels failed recovering from a tx_timeout, err(%d).\n", - err); - unlock: mutex_unlock(&priv->state_lock); rtnl_unlock();