Skip to content

Commit

Permalink
RDMA/hns: Fix mbx timing out before CMD execution is completed
Browse files Browse the repository at this point in the history
When a large number of tasks are issued, the speed of HW processing
mbx will slow down. The standard for judging mbx timeout in the current
firmware is 30ms, and the current timeout standard for the driver is also
30ms.

Considering that firmware scheduling in multi-function scenarios takes a
certain amount of time, this will cause the driver to time out too early
and report a failure before mbx execution times out.

This patch introduces a new mechanism that can set different timeouts for
different cmds and extends the timeout of mbx to 35ms.

Fixes: a04ff73 ("RDMA/hns: Add command queue support for hip08 RoCE driver")
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://lore.kernel.org/r/20240710133705.896445-9-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
  • Loading branch information
Chengchang Tang authored and Leon Romanovsky committed Jul 11, 2024
1 parent 0b8e658 commit bbddfa2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 7 deletions.
35 changes: 28 additions & 7 deletions drivers/infiniband/hw/hns/hns_roce_hw_v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -1275,12 +1275,38 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
return -EIO;
}

static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
{
static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
{HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
};
int i;

for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
if (cmdq_tx_timeout[i].opcode == opcode)
return cmdq_tx_timeout[i].tx_timeout;

return tx_timeout;
}

static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
u32 timeout = 0;

do {
if (hns_roce_cmq_csq_done(hr_dev))
break;
udelay(1);
} while (++timeout < tx_timeout);
}

static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
u32 timeout = 0;
u16 desc_ret;
u32 tail;
int ret;
Expand All @@ -1301,12 +1327,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,

atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);

do {
if (hns_roce_cmq_csq_done(hr_dev))
break;
udelay(1);
} while (++timeout < priv->cmq.tx_timeout);

hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
if (hns_roce_cmq_csq_done(hr_dev)) {
ret = 0;
for (i = 0; i < num; i++) {
Expand Down
6 changes: 6 additions & 0 deletions drivers/infiniband/hw/hns/hns_roce_hw_v2.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,12 @@ enum hns_roce_opcode_type {
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};

#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
struct hns_roce_cmdq_tx_timeout_map {
u16 opcode;
u32 tx_timeout;
};

enum {
TYPE_CRQ,
TYPE_CSQ,
Expand Down

0 comments on commit bbddfa2

Please sign in to comment.