From b8db67d4df0022e3595263b542953c251f4ccf06 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 30 Sep 2024 22:13:04 +0200 Subject: [PATCH 1/4] qed: make 'ethtool -d' 10 times faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a side effect of commit 5401c3e09928 ("qed: allow sleep in qed_mcp_trace_dump()"), 'ethtool -d' became much slower. Almost all the time is spent collecting the "mcp_trace". It is caused by sleeping too long in _qed_mcp_cmd_and_union. When called with sleeping not allowed, the function delays for 10 µs between firmware polls. But if sleeping is allowed, it sleeps for 10 ms instead. The sleeps in _qed_mcp_cmd_and_union are unnecessarily long. Replace msleep with usleep_range, which allows to achieve a similar polling interval like in the no-sleeping mode (10 - 20 µs). The only caller, qed_mcp_cmd_and_union, can stop doing the multiplication/division of the usecs/max_retries. The polling interval and the number of retries do not need to be parameters at all. On my test system, 'ethtool -d' now takes 4 seconds instead of 44. Signed-off-by: Michal Schmidt Link: https://patch.msgid.link/20240930201307.330692-2-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 36 ++++++++++------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 16e6bd4661433..00f0abc1c2d26 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -459,12 +459,11 @@ static void qed_mcp_print_cpu_info(struct qed_hwfn *p_hwfn, static int _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct qed_mcp_mb_params *p_mb_params, - u32 max_retries, u32 usecs) + struct qed_mcp_mb_params *p_mb_params) { - u32 cnt = 0, msecs = DIV_ROUND_UP(usecs, 1000); struct qed_mcp_cmd_elem *p_cmd_elem; u16 seq_num; + u32 cnt = 0; int rc = 0; /* Wait until the mailbox is non-occupied */ @@ -488,12 +487,13 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) - msleep(msecs); + usleep_range(QED_MCP_RESP_ITER_US, + QED_MCP_RESP_ITER_US * 2); else - udelay(usecs); - } while (++cnt < max_retries); + udelay(QED_MCP_RESP_ITER_US); + } while (++cnt < QED_DRV_MB_MAX_RETRIES); - if (cnt >= max_retries) { + if (cnt >= QED_DRV_MB_MAX_RETRIES) { DP_NOTICE(p_hwfn, "The MFW mailbox is occupied by an uncompleted command. Failed to send command 0x%08x [param 0x%08x].\n", p_mb_params->cmd, p_mb_params->param); @@ -520,9 +520,10 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, */ if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) - msleep(msecs); + usleep_range(QED_MCP_RESP_ITER_US, + QED_MCP_RESP_ITER_US * 2); else - udelay(usecs); + udelay(QED_MCP_RESP_ITER_US); spin_lock_bh(&p_hwfn->mcp_info->cmd_lock); @@ -536,9 +537,9 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, goto err; spin_unlock_bh(&p_hwfn->mcp_info->cmd_lock); - } while (++cnt < max_retries); + } while (++cnt < QED_DRV_MB_MAX_RETRIES); - if (cnt >= max_retries) { + if (cnt >= QED_DRV_MB_MAX_RETRIES) { DP_NOTICE(p_hwfn, "The MFW failed to respond to command 0x%08x [param 0x%08x].\n", p_mb_params->cmd, p_mb_params->param); @@ -564,7 +565,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, "MFW mailbox: response 0x%08x param 0x%08x [after %d.%03d ms]\n", p_mb_params->mcp_resp, p_mb_params->mcp_param, - (cnt * usecs) / 1000, (cnt * usecs) % 1000); + (cnt * QED_MCP_RESP_ITER_US) / 1000, + (cnt * QED_MCP_RESP_ITER_US) % 1000); /* Clear the sequence number from the MFW response */ p_mb_params->mcp_resp &= FW_MSG_CODE_MASK; @@ -581,8 +583,6 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, struct qed_mcp_mb_params *p_mb_params) { size_t union_data_size = sizeof(union drv_union_data); - u32 max_retries = QED_DRV_MB_MAX_RETRIES; - u32 usecs = QED_MCP_RESP_ITER_US; /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { @@ -606,13 +606,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, return -EINVAL; } - if (QED_MB_FLAGS_IS_SET(p_mb_params, CAN_SLEEP)) { - max_retries = DIV_ROUND_UP(max_retries, 1000); - usecs *= 1000; - } - - return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params, max_retries, - usecs); + return _qed_mcp_cmd_and_union(p_hwfn, p_ptt, p_mb_params); } static int _qed_mcp_cmd(struct qed_hwfn *p_hwfn, From 6cd695706f8bb3d06a4dda1f7d673dbfcca45784 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 30 Sep 2024 22:13:05 +0200 Subject: [PATCH 2/4] qed: put cond_resched() in qed_grc_dump_ctx_data() On a kernel with preemption none or voluntary, 'ethtool -d' on a qede network device can cause a big latency spike. The biggest part of it is the loop in qed_grc_dump_ctx_data. The function is called only from the .get_size and .perform_dump callbacks for the "grc" feature defined in qed_features_lookup[]. As far as I can see, they are used in: - qed's devlink healh reporter .dump op - qede's ethtool get_regs/get_regs_len/get_dump_data ops - qedf's qedf_get_grc_dump, called from: - qedf_sysfs_write_grcdump - "grcdump" sysfs attribute write - qedf_wq_grcdump - a workqueue It is safe to sleep in all of them. Let's insert a cond_resched() in the outer loop to let other tasks run. Measured using this script: #!/bin/bash DEV=ens3f1 echo wakeup_rt > /sys/kernel/tracing/current_tracer echo 0 > /sys/kernel/tracing/tracing_max_latency echo 1 > /sys/kernel/tracing/tracing_on echo "Setting the task CPU affinity" taskset -p 1 $$ > /dev/null echo "Starting the real-time task" chrt -f 50 bash -c 'while sleep 0.01; do :; done' & sleep 1 echo "Running: ethtool -d $DEV" time ethtool -d $DEV > /dev/null kill %1 echo 0 > /sys/kernel/tracing/tracing_on echo "Measured latency: $( Link: https://patch.msgid.link/20240930201307.330692-3-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index f67be4b8ad435..464a72afb7589 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -2873,6 +2873,7 @@ static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn, false, SPLIT_TYPE_NONE, 0); } + cond_resched(); } return offset; From cf54ae6b59203bea4f4c749043fa57a58d279e38 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 30 Sep 2024 22:13:06 +0200 Subject: [PATCH 3/4] qed: allow the callee of qed_mcp_nvm_read() to sleep qed_mcp_nvm_read has a loop where it calls qed_mcp_nvm_rd_cmd with the argument b_can_sleep=false. And it sleeps once every 0x1000 bytes read. Simplify this by letting qed_mcp_nvm_rd_cmd itself sleep (b_can_sleep=true). It will have slept at least once when successful (in the "Wait for the MFW response" loop). So the extra sleep once every 0x1000 bytes becomes superfluous. Delete it. On my test system with voluntary preemption, this lowers the latency caused by 'ethtool -d' from 53 ms to 10 ms. Signed-off-by: Michal Schmidt Link: https://patch.msgid.link/20240930201307.330692-4-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_mcp.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 00f0abc1c2d26..26a714bfad4ec 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3079,20 +3079,13 @@ int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len) DRV_MB_PARAM_NVM_LEN_OFFSET), &resp, &resp_param, &read_len, - (u32 *)(p_buf + offset), false); + (u32 *)(p_buf + offset), true); if (rc || (resp != FW_MSG_CODE_NVM_OK)) { DP_NOTICE(cdev, "MCP command rc = %d\n", rc); break; } - /* This can be a lengthy process, and it's possible scheduler - * isn't preemptible. Sleep a bit to prevent CPU hogging. - */ - if (bytes_left % 0x1000 < - (bytes_left - read_len) % 0x1000) - usleep_range(1000, 2000); - offset += read_len; bytes_left -= read_len; } From 2efeaf1d2a13f4b7419d60cd145ac84a3c151214 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 30 Sep 2024 22:13:07 +0200 Subject: [PATCH 4/4] qed: put cond_resched() in qed_dmae_operation_wait() It is OK to sleep in qed_dmae_operation_wait, because it is called only in process context, while holding p_hwfn->dmae_info.mutex from one of the qed_dmae_{host,grc}2{host,grc} functions. The udelay(DMAE_MIN_WAIT_TIME=2) in the function is too short to replace with usleep_range, but at least it's a suitable point for checking if we should give up the CPU with cond_resched(). This lowers the latency caused by 'ethtool -d' from 10 ms to less than 2 ms on my test system with voluntary preemption. Signed-off-by: Michal Schmidt Link: https://patch.msgid.link/20240930201307.330692-5-mschmidt@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qed/qed_hw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 6263f847b6b92..9e5f0dbc8a076 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -596,6 +596,7 @@ static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn) barrier(); while (*p_hwfn->dmae_info.p_completion_word != DMAE_COMPLETION_VAL) { udelay(DMAE_MIN_WAIT_TIME); + cond_resched(); if (++wait_cnt > wait_cnt_limit) { DP_NOTICE(p_hwfn->cdev, "Timed-out waiting for operation to complete. Completion word is 0x%08x expected 0x%08x.\n",