Skip to content

Commit

Permalink
net/mlx5: Add support for FW fatal reporter dump
Browse files Browse the repository at this point in the history
Add support of dump callback for mlx5 FW fatal reporter.
The FW fatal dump uses cr-dump functionality to gather cr-space data for
debug. The cr-dump uses vsc interface which is valid even if the FW
command interface is not functional, which is the case in most FW fatal
errors.

Command example and output:
$ devlink health dump show pci/0000:82:00.0 reporter fw_fatal
 crdump_data:
  00 20 00 01 00 00 00 00 03 00 00 00 00 00 00 00
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 80
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  00 00 00 00 00 00 00 00 00 00 00 00 ba 82 00 00
  0c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20
  00 00 00 00 00 00 00 00 00 00 00 00 00 00 fa 00
  a4 0e 00 00 00 00 00 00 80 c7 fe ff 50 0a 00 00
...
...

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Moshe Shemesh authored and Saeed Mahameed committed Jun 13, 2019
1 parent 96c82cd commit 9b1f298
Showing 1 changed file with 50 additions and 0 deletions.
50 changes: 50 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/health.c
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,59 @@ mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
return mlx5_health_try_recover(dev);
}

#define MLX5_CR_DUMP_CHUNK_SIZE 256
static int
mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
struct devlink_fmsg *fmsg, void *priv_ctx)
{
struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
u32 crdump_size = dev->priv.health.crdump_size;
u32 *cr_data;
u32 data_size;
u32 offset;
int err;

if (!mlx5_core_is_pf(dev))
return -EPERM;

cr_data = kvmalloc(crdump_size, GFP_KERNEL);
if (!cr_data)
return -ENOMEM;
err = mlx5_crdump_collect(dev, cr_data);
if (err)
return err;

if (priv_ctx) {
struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;

err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
if (err)
goto free_data;
}

err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data");
if (err)
goto free_data;
for (offset = 0; offset < crdump_size; offset += data_size) {
if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE)
data_size = crdump_size - offset;
else
data_size = MLX5_CR_DUMP_CHUNK_SIZE;
err = devlink_fmsg_binary_put(fmsg, cr_data, data_size);
if (err)
goto free_data;
}
err = devlink_fmsg_arr_pair_nest_end(fmsg);

free_data:
kfree(cr_data);
return err;
}

static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
.name = "fw_fatal",
.recover = mlx5_fw_fatal_reporter_recover,
.dump = mlx5_fw_fatal_reporter_dump,
};

#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000
Expand Down

0 comments on commit 9b1f298

Please sign in to comment.