Skip to content

Commit

Permalink
RDMA/bnxt_re: expose detailed stats retrieved from HW
Browse files Browse the repository at this point in the history
Broadcom's adapter supports more granular statistics
to allow better understanding about the state of the
chip when data traffic is flowing.

Exposing the detailed stats to the consumer through
the standard hook available in the kverbs interface.
In order to retrieve all the information, driver
implements a firmware command.

Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
  • Loading branch information
Selvin Xavier authored and Doug Ledford committed Jan 18, 2018
1 parent 872f357 commit 89f8100
Show file tree
Hide file tree
Showing 7 changed files with 417 additions and 10 deletions.
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/bnxt_re/bnxt_re.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
#define BNXT_RE_FLAG_TASK_IN_PROG 6
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
unsigned int version, major, minor;
struct bnxt_en_dev *en_dev;
Expand Down Expand Up @@ -168,6 +169,7 @@ struct bnxt_re_dev {
atomic_t nq_alloc_cnt;
u32 is_virtfn;
u32 num_vfs;
struct bnxt_qplib_roce_stats stats;
};

#define to_bnxt_re_dev(ptr, member) \
Expand Down
145 changes: 135 additions & 10 deletions drivers/infiniband/hw/bnxt_re/hw_counters.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,55 @@
#include "hw_counters.h"

static const char * const bnxt_re_stat_name[] = {
[BNXT_RE_ACTIVE_QP] = "active_qps",
[BNXT_RE_ACTIVE_SRQ] = "active_srqs",
[BNXT_RE_ACTIVE_CQ] = "active_cqs",
[BNXT_RE_ACTIVE_MR] = "active_mrs",
[BNXT_RE_ACTIVE_MW] = "active_mws",
[BNXT_RE_RX_PKTS] = "rx_pkts",
[BNXT_RE_RX_BYTES] = "rx_bytes",
[BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors"
[BNXT_RE_ACTIVE_QP] = "active_qps",
[BNXT_RE_ACTIVE_SRQ] = "active_srqs",
[BNXT_RE_ACTIVE_CQ] = "active_cqs",
[BNXT_RE_ACTIVE_MR] = "active_mrs",
[BNXT_RE_ACTIVE_MW] = "active_mws",
[BNXT_RE_RX_PKTS] = "rx_pkts",
[BNXT_RE_RX_BYTES] = "rx_bytes",
[BNXT_RE_TX_PKTS] = "tx_pkts",
[BNXT_RE_TX_BYTES] = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS] = "recoverable_errors",
[BNXT_RE_TO_RETRANSMITS] = "to_retransmits",
[BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd",
[BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded",
[BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd",
[BNXT_RE_MISSING_RESP] = "missin_resp",
[BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err",
[BNXT_RE_BAD_RESP_ERR] = "bad_resp_err",
[BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err",
[BNXT_RE_LOCAL_PROTECTION_ERR] = "local_protection_err",
[BNXT_RE_MEM_MGMT_OP_ERR] = "mem_mgmt_op_err",
[BNXT_RE_REMOTE_INVALID_REQ_ERR] = "remote_invalid_req_err",
[BNXT_RE_REMOTE_ACCESS_ERR] = "remote_access_err",
[BNXT_RE_REMOTE_OP_ERR] = "remote_op_err",
[BNXT_RE_DUP_REQ] = "dup_req",
[BNXT_RE_RES_EXCEED_MAX] = "res_exceed_max",
[BNXT_RE_RES_LENGTH_MISMATCH] = "res_length_mismatch",
[BNXT_RE_RES_EXCEEDS_WQE] = "res_exceeds_wqe",
[BNXT_RE_RES_OPCODE_ERR] = "res_opcode_err",
[BNXT_RE_RES_RX_INVALID_RKEY] = "res_rx_invalid_rkey",
[BNXT_RE_RES_RX_DOMAIN_ERR] = "res_rx_domain_err",
[BNXT_RE_RES_RX_NO_PERM] = "res_rx_no_perm",
[BNXT_RE_RES_RX_RANGE_ERR] = "res_rx_range_err",
[BNXT_RE_RES_TX_INVALID_RKEY] = "res_tx_invalid_rkey",
[BNXT_RE_RES_TX_DOMAIN_ERR] = "res_tx_domain_err",
[BNXT_RE_RES_TX_NO_PERM] = "res_tx_no_perm",
[BNXT_RE_RES_TX_RANGE_ERR] = "res_tx_range_err",
[BNXT_RE_RES_IRRQ_OFLOW] = "res_irrq_oflow",
[BNXT_RE_RES_UNSUP_OPCODE] = "res_unsup_opcode",
[BNXT_RE_RES_UNALIGNED_ATOMIC] = "res_unaligned_atomic",
[BNXT_RE_RES_REM_INV_ERR] = "res_rem_inv_err",
[BNXT_RE_RES_MEM_ERROR] = "res_mem_err",
[BNXT_RE_RES_SRQ_ERR] = "res_srq_err",
[BNXT_RE_RES_CMP_ERR] = "res_cmp_err",
[BNXT_RE_RES_INVALID_DUP_RKEY] = "res_invalid_dup_rkey",
[BNXT_RE_RES_WQE_FORMAT_ERR] = "res_wqe_format_err",
[BNXT_RE_RES_CQ_LOAD_ERR] = "res_cq_load_err",
[BNXT_RE_RES_SRQ_LOAD_ERR] = "res_srq_load_err",
[BNXT_RE_RES_TX_PCI_ERR] = "res_tx_pci_err",
[BNXT_RE_RES_RX_PCI_ERR] = "res_rx_pci_err"
};

int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
Expand All @@ -76,6 +115,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
struct ctx_hw_stats *bnxt_re_stats = rdev->qplib_ctx.stats.dma;
int rc = 0;

if (!port || !stats)
return -EINVAL;
Expand All @@ -97,6 +137,91 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
stats->value[BNXT_RE_TX_BYTES] =
le64_to_cpu(bnxt_re_stats->tx_ucast_bytes);
}
if (test_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags)) {
rc = bnxt_qplib_get_roce_stats(&rdev->rcfw, &rdev->stats);
if (rc)
clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS,
&rdev->flags);
stats->value[BNXT_RE_TO_RETRANSMITS] =
rdev->stats.to_retransmits;
stats->value[BNXT_RE_SEQ_ERR_NAKS_RCVD] =
rdev->stats.seq_err_naks_rcvd;
stats->value[BNXT_RE_MAX_RETRY_EXCEEDED] =
rdev->stats.max_retry_exceeded;
stats->value[BNXT_RE_RNR_NAKS_RCVD] =
rdev->stats.rnr_naks_rcvd;
stats->value[BNXT_RE_MISSING_RESP] =
rdev->stats.missing_resp;
stats->value[BNXT_RE_UNRECOVERABLE_ERR] =
rdev->stats.unrecoverable_err;
stats->value[BNXT_RE_BAD_RESP_ERR] =
rdev->stats.bad_resp_err;
stats->value[BNXT_RE_LOCAL_QP_OP_ERR] =
rdev->stats.local_qp_op_err;
stats->value[BNXT_RE_LOCAL_PROTECTION_ERR] =
rdev->stats.local_protection_err;
stats->value[BNXT_RE_MEM_MGMT_OP_ERR] =
rdev->stats.mem_mgmt_op_err;
stats->value[BNXT_RE_REMOTE_INVALID_REQ_ERR] =
rdev->stats.remote_invalid_req_err;
stats->value[BNXT_RE_REMOTE_ACCESS_ERR] =
rdev->stats.remote_access_err;
stats->value[BNXT_RE_REMOTE_OP_ERR] =
rdev->stats.remote_op_err;
stats->value[BNXT_RE_DUP_REQ] =
rdev->stats.dup_req;
stats->value[BNXT_RE_RES_EXCEED_MAX] =
rdev->stats.res_exceed_max;
stats->value[BNXT_RE_RES_LENGTH_MISMATCH] =
rdev->stats.res_length_mismatch;
stats->value[BNXT_RE_RES_EXCEEDS_WQE] =
rdev->stats.res_exceeds_wqe;
stats->value[BNXT_RE_RES_OPCODE_ERR] =
rdev->stats.res_opcode_err;
stats->value[BNXT_RE_RES_RX_INVALID_RKEY] =
rdev->stats.res_rx_invalid_rkey;
stats->value[BNXT_RE_RES_RX_DOMAIN_ERR] =
rdev->stats.res_rx_domain_err;
stats->value[BNXT_RE_RES_RX_NO_PERM] =
rdev->stats.res_rx_no_perm;
stats->value[BNXT_RE_RES_RX_RANGE_ERR] =
rdev->stats.res_rx_range_err;
stats->value[BNXT_RE_RES_TX_INVALID_RKEY] =
rdev->stats.res_tx_invalid_rkey;
stats->value[BNXT_RE_RES_TX_DOMAIN_ERR] =
rdev->stats.res_tx_domain_err;
stats->value[BNXT_RE_RES_TX_NO_PERM] =
rdev->stats.res_tx_no_perm;
stats->value[BNXT_RE_RES_TX_RANGE_ERR] =
rdev->stats.res_tx_range_err;
stats->value[BNXT_RE_RES_IRRQ_OFLOW] =
rdev->stats.res_irrq_oflow;
stats->value[BNXT_RE_RES_UNSUP_OPCODE] =
rdev->stats.res_unsup_opcode;
stats->value[BNXT_RE_RES_UNALIGNED_ATOMIC] =
rdev->stats.res_unaligned_atomic;
stats->value[BNXT_RE_RES_REM_INV_ERR] =
rdev->stats.res_rem_inv_err;
stats->value[BNXT_RE_RES_MEM_ERROR] =
rdev->stats.res_mem_error;
stats->value[BNXT_RE_RES_SRQ_ERR] =
rdev->stats.res_srq_err;
stats->value[BNXT_RE_RES_CMP_ERR] =
rdev->stats.res_cmp_err;
stats->value[BNXT_RE_RES_INVALID_DUP_RKEY] =
rdev->stats.res_invalid_dup_rkey;
stats->value[BNXT_RE_RES_WQE_FORMAT_ERR] =
rdev->stats.res_wqe_format_err;
stats->value[BNXT_RE_RES_CQ_LOAD_ERR] =
rdev->stats.res_cq_load_err;
stats->value[BNXT_RE_RES_SRQ_LOAD_ERR] =
rdev->stats.res_srq_load_err;
stats->value[BNXT_RE_RES_TX_PCI_ERR] =
rdev->stats.res_tx_pci_err;
stats->value[BNXT_RE_RES_RX_PCI_ERR] =
rdev->stats.res_rx_pci_err;
}

return ARRAY_SIZE(bnxt_re_stat_name);
}

Expand Down
39 changes: 39 additions & 0 deletions drivers/infiniband/hw/bnxt_re/hw_counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,45 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
BNXT_RE_TO_RETRANSMITS,
BNXT_RE_SEQ_ERR_NAKS_RCVD,
BNXT_RE_MAX_RETRY_EXCEEDED,
BNXT_RE_RNR_NAKS_RCVD,
BNXT_RE_MISSING_RESP,
BNXT_RE_UNRECOVERABLE_ERR,
BNXT_RE_BAD_RESP_ERR,
BNXT_RE_LOCAL_QP_OP_ERR,
BNXT_RE_LOCAL_PROTECTION_ERR,
BNXT_RE_MEM_MGMT_OP_ERR,
BNXT_RE_REMOTE_INVALID_REQ_ERR,
BNXT_RE_REMOTE_ACCESS_ERR,
BNXT_RE_REMOTE_OP_ERR,
BNXT_RE_DUP_REQ,
BNXT_RE_RES_EXCEED_MAX,
BNXT_RE_RES_LENGTH_MISMATCH,
BNXT_RE_RES_EXCEEDS_WQE,
BNXT_RE_RES_OPCODE_ERR,
BNXT_RE_RES_RX_INVALID_RKEY,
BNXT_RE_RES_RX_DOMAIN_ERR,
BNXT_RE_RES_RX_NO_PERM,
BNXT_RE_RES_RX_RANGE_ERR,
BNXT_RE_RES_TX_INVALID_RKEY,
BNXT_RE_RES_TX_DOMAIN_ERR,
BNXT_RE_RES_TX_NO_PERM,
BNXT_RE_RES_TX_RANGE_ERR,
BNXT_RE_RES_IRRQ_OFLOW,
BNXT_RE_RES_UNSUP_OPCODE,
BNXT_RE_RES_UNALIGNED_ATOMIC,
BNXT_RE_RES_REM_INV_ERR,
BNXT_RE_RES_MEM_ERROR,
BNXT_RE_RES_SRQ_ERR,
BNXT_RE_RES_CMP_ERR,
BNXT_RE_RES_INVALID_DUP_RKEY,
BNXT_RE_RES_WQE_FORMAT_ERR,
BNXT_RE_RES_CQ_LOAD_ERR,
BNXT_RE_RES_SRQ_LOAD_ERR,
BNXT_RE_RES_TX_PCI_ERR,
BNXT_RE_RES_RX_PCI_ERR,
BNXT_RE_NUM_COUNTERS
};

Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/hw/bnxt_re/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1245,6 +1245,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
ib_get_eth_speed(&rdev->ibdev, 1, &rdev->active_speed,
&rdev->active_width);
set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);

Expand Down
70 changes: 70 additions & 0 deletions drivers/infiniband/hw/bnxt_re/qplib_sp.c
Original file line number Diff line number Diff line change
Expand Up @@ -790,3 +790,73 @@ int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
0);
return 0;
}

int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_roce_stats *stats)
{
struct cmdq_query_roce_stats req;
struct creq_query_roce_stats_resp resp;
struct bnxt_qplib_rcfw_sbuf *sbuf;
struct creq_query_roce_stats_resp_sb *sb;
u16 cmd_flags = 0;
int rc = 0;

RCFW_CMD_PREP(req, QUERY_ROCE_STATS, cmd_flags);

sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
if (!sbuf) {
dev_err(&rcfw->pdev->dev,
"QPLIB: SP: QUERY_ROCE_STATS alloc side buffer failed");
return -ENOMEM;
}

sb = sbuf->sb;
req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
(void *)sbuf, 0);
if (rc)
goto bail;
/* Extract the context from the side buffer */
stats->to_retransmits = le64_to_cpu(sb->to_retransmits);
stats->seq_err_naks_rcvd = le64_to_cpu(sb->seq_err_naks_rcvd);
stats->max_retry_exceeded = le64_to_cpu(sb->max_retry_exceeded);
stats->rnr_naks_rcvd = le64_to_cpu(sb->rnr_naks_rcvd);
stats->missing_resp = le64_to_cpu(sb->missing_resp);
stats->unrecoverable_err = le64_to_cpu(sb->unrecoverable_err);
stats->bad_resp_err = le64_to_cpu(sb->bad_resp_err);
stats->local_qp_op_err = le64_to_cpu(sb->local_qp_op_err);
stats->local_protection_err = le64_to_cpu(sb->local_protection_err);
stats->mem_mgmt_op_err = le64_to_cpu(sb->mem_mgmt_op_err);
stats->remote_invalid_req_err = le64_to_cpu(sb->remote_invalid_req_err);
stats->remote_access_err = le64_to_cpu(sb->remote_access_err);
stats->remote_op_err = le64_to_cpu(sb->remote_op_err);
stats->dup_req = le64_to_cpu(sb->dup_req);
stats->res_exceed_max = le64_to_cpu(sb->res_exceed_max);
stats->res_length_mismatch = le64_to_cpu(sb->res_length_mismatch);
stats->res_exceeds_wqe = le64_to_cpu(sb->res_exceeds_wqe);
stats->res_opcode_err = le64_to_cpu(sb->res_opcode_err);
stats->res_rx_invalid_rkey = le64_to_cpu(sb->res_rx_invalid_rkey);
stats->res_rx_domain_err = le64_to_cpu(sb->res_rx_domain_err);
stats->res_rx_no_perm = le64_to_cpu(sb->res_rx_no_perm);
stats->res_rx_range_err = le64_to_cpu(sb->res_rx_range_err);
stats->res_tx_invalid_rkey = le64_to_cpu(sb->res_tx_invalid_rkey);
stats->res_tx_domain_err = le64_to_cpu(sb->res_tx_domain_err);
stats->res_tx_no_perm = le64_to_cpu(sb->res_tx_no_perm);
stats->res_tx_range_err = le64_to_cpu(sb->res_tx_range_err);
stats->res_irrq_oflow = le64_to_cpu(sb->res_irrq_oflow);
stats->res_unsup_opcode = le64_to_cpu(sb->res_unsup_opcode);
stats->res_unaligned_atomic = le64_to_cpu(sb->res_unaligned_atomic);
stats->res_rem_inv_err = le64_to_cpu(sb->res_rem_inv_err);
stats->res_mem_error = le64_to_cpu(sb->res_mem_error);
stats->res_srq_err = le64_to_cpu(sb->res_srq_err);
stats->res_cmp_err = le64_to_cpu(sb->res_cmp_err);
stats->res_invalid_dup_rkey = le64_to_cpu(sb->res_invalid_dup_rkey);
stats->res_wqe_format_err = le64_to_cpu(sb->res_wqe_format_err);
stats->res_cq_load_err = le64_to_cpu(sb->res_cq_load_err);
stats->res_srq_load_err = le64_to_cpu(sb->res_srq_load_err);
stats->res_tx_pci_err = le64_to_cpu(sb->res_tx_pci_err);
stats->res_rx_pci_err = le64_to_cpu(sb->res_rx_pci_err);
bail:
bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
return rc;
}
Loading

0 comments on commit 89f8100

Please sign in to comment.