Skip to content

Commit

Permalink
RDMA/mlx5: Change debug log level for remote access error syndromes
Browse files Browse the repository at this point in the history
The mlx5 driver dumps the entire CQE buffer by default for few syndromes.
Some syndromes are expected due to the application behavior [ex:
MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR, MLX5_CQE_SYNDROME_REMOTE_OP_ERR and
MLX5_CQE_SYNDROME_LOCAL_PROT_ERR]. Hence, for these syndromes, the patch
converts the log level from KERN_WARNING to KERN_DEBUG. This enables the
application to get the CQE buffer dump by changing to KERN_DEBUG level
as and when needed.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Arumugam Kolappan <aru.kolappan@oracle.com>
Link: https://lore.kernel.org/r/1667287664-19377-1-git-send-email-aru.kolappan@oracle.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
  • Loading branch information
Arumugam Kolappan authored and Leon Romanovsky committed Nov 6, 2022
1 parent 692373d commit abef378
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 12 deletions.
27 changes: 15 additions & 12 deletions drivers/infiniband/hw/mlx5/cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}

static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
struct ib_wc *wc, const char *level)
{
mlx5_ib_warn(dev, "dump error cqe\n");
mlx5_dump_err_cqe(dev->mdev, cqe);
mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
ib_wc_status_msg(wc->status));
print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
cqe, sizeof(*cqe), false);
}

static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
struct mlx5_err_cqe *cqe,
struct ib_wc *wc)
{
int dump = 1;
const char *dump = KERN_WARNING;

switch (cqe->syndrome) {
case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
Expand All @@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_LOC_QP_OP_ERR;
break;
case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_LOC_PROT_ERR;
break;
case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
dump = 0;
dump = NULL;
wc->status = IB_WC_WR_FLUSH_ERR;
break;
case MLX5_CQE_SYNDROME_MW_BIND_ERR:
Expand All @@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
wc->status = IB_WC_REM_INV_REQ_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_REM_ACCESS_ERR;
break;
case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
dump = KERN_DEBUG;
wc->status = IB_WC_REM_OP_ERR;
break;
case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
dump = NULL;
wc->status = IB_WC_RETRY_EXC_ERR;
dump = 0;
break;
case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
dump = NULL;
wc->status = IB_WC_RNR_RETRY_EXC_ERR;
dump = 0;
break;
case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
wc->status = IB_WC_REM_ABORT_ERR;
Expand All @@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
}

wc->vendor_err = cqe->vendor_err_synd;
if (dump) {
mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
ib_wc_status_msg(wc->status));
dump_cqe(dev, cqe);
}
if (dump)
dump_cqe(dev, cqe, wc, dump);
}

static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
Expand Down
4 changes: 4 additions & 0 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@
dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)

#define mlx5_ib_log(lvl, _dev, format, arg...) \
dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \
__func__, __LINE__, current->pid, ##arg)

#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)

Expand Down

0 comments on commit abef378

Please sign in to comment.