From a734c43caa4d9a08da521be1a2135cadf1510e75 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 13 Jun 2023 13:40:53 -0700 Subject: [PATCH 1/6] ice: reduce initial wait for control queue messages The ice_sq_send_cmd() function is used to send messages to the control queues used to communicate with firmware, virtual functions, and even some hardware. When sending a control queue message, the driver is designed to synchronously wait for a response from the queue. Currently it waits between checks for 100 to 150 microseconds. Commit f86d6f9c49f6 ("ice: sleep, don't busy-wait, for ICE_CTL_Q_SQ_CMD_TIMEOUT") did recently change the behavior from an unnecessary delay into a sleep which is a significant improvement over the old behavior of polling using udelay. Because of the nature of PCIe transactions, the hardware won't be informed about a new message until the write to the tail register posts. This is only guaranteed to occur at the next register read. In ice_sq_send_cmd(), this happens at the ice_sq_done() call. Because of this, the driver essentially forces a minimum of one full wait time regardless of how fast the response is. For the hardware-based sideband queue, this is especially slow. It is expected that the hardware will respond within 2 or 3 microseconds, an order of magnitude faster than the 100-150 microsecond sleep. Allow such fast completions to occur without delay by introducing a small 5 microsecond delay first before entering the sleeping timeout loop. Ensure the tail write has been posted by using ice_flush(hw) first. While at it, lets also remove the ICE_CTL_Q_SQ_CMD_USEC macro as it obscures the sleep time in the inner loop. It was likely introduced to avoid "magic numbers", but in practice sleep and delay values are easier to read and understand when using actual numbers instead of a named constant. This change should allow the fast hardware based control queue messages to complete quickly without delay, while slower firmware queue response times will sleep while waiting for the response. Signed-off-by: Jacob Keller Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_controlq.c | 9 +++++++-- drivers/net/ethernet/intel/ice/ice_controlq.h | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index d2faf1baad2f9..385fd88831dbd 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1056,14 +1056,19 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (cq->sq.next_to_use == cq->sq.count) cq->sq.next_to_use = 0; wr32(hw, cq->sq.tail, cq->sq.next_to_use); + ice_flush(hw); + + /* Wait a short time before initial ice_sq_done() check, to allow + * hardware time for completion. + */ + udelay(5); timeout = jiffies + ICE_CTL_Q_SQ_CMD_TIMEOUT; do { if (ice_sq_done(hw, cq)) break; - usleep_range(ICE_CTL_Q_SQ_CMD_USEC, - ICE_CTL_Q_SQ_CMD_USEC * 3 / 2); + usleep_range(100, 150); } while (time_before(jiffies, timeout)); /* if ready, copy the desc back to temp */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 950b7f4a7a053..8f2fd1613a952 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -35,7 +35,6 @@ enum ice_ctl_q { /* Control Queue timeout settings - max delay 1s */ #define ICE_CTL_Q_SQ_CMD_TIMEOUT HZ /* Wait max 1s */ -#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ From 469748429ac81f0a6a344637fc9d3b1d16a9f3d8 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 15 Jun 2023 13:33:26 +0200 Subject: [PATCH 2/6] ice: allow hot-swapping XDP programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ice driver's .ndo_bpf callback brings interface down and up independently of XDP resources' presence. This is only needed when either these resources have to be configured or removed. It means that if one is switching XDP programs on-the-fly with running traffic, packets will be dropped. To avoid this, compare early on ice_xdp_setup_prog() state of incoming bpf_prog pointer vs the bpf_prog pointer that is already assigned to VSI. Do the swap in case VSI has bpf_prog and incoming one are non-NULL. Lastly, while at it, put old bpf_prog *after* the update of Rx ring's bpf_prog pointer. In theory previous code could expose us to a state where Rx ring's bpf_prog would still be referring to old_prog that got released with earlier bpf_prog_put(). Signed-off-by: Maciej Fijalkowski Acked-by: Toke Høiland-Jørgensen Reviewed-by: Alexander Lobakin Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 65bf399a0efcd..5dd88611141e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2633,11 +2633,11 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) int i; old_prog = xchg(&vsi->xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - ice_for_each_rxq(vsi, i) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); + + if (old_prog) + bpf_prog_put(old_prog); } /** @@ -2922,6 +2922,12 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } + /* hot swap progs and avoid toggling link */ + if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + ice_vsi_assign_bpf_prog(vsi, prog); + return 0; + } + /* need to stop netdev while setting up the program for Rx rings */ if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); @@ -2954,13 +2960,6 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_realloc_zc_buf(vsi, false); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); - } else { - /* safe to call even when prog == vsi->xdp_prog as - * dev_xdp_install in net/core/dev.c incremented prog's - * refcount so corresponding bpf_prog_put won't cause - * underflow - */ - ice_vsi_assign_bpf_prog(vsi, prog); } if (if_running) From f98277479ad85ff1398e11c1e944ba97c3917393 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 31 May 2023 14:36:42 +0200 Subject: [PATCH 3/6] ice: clean up freeing SR-IOV VFs The check for existing VFs was redundant since very inception of SR-IOV sysfs interface in the kernel, see commit 1789382a72a5 ("PCI: SRIOV control and status via sysfs"). Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 2ea6d24977a68..1f66914c7a202 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -905,14 +905,13 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) */ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { - int pre_existing_vfs = pci_num_vf(pf->pdev); struct device *dev = ice_pf_to_dev(pf); int err; - if (pre_existing_vfs && pre_existing_vfs != num_vfs) + if (!num_vfs) { ice_free_vfs(pf); - else if (pre_existing_vfs && pre_existing_vfs == num_vfs) return 0; + } if (num_vfs > pf->vfs.num_supported) { dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", From ad667d626825383b626ad6ed38d6205618abb115 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 31 May 2023 14:38:40 +0200 Subject: [PATCH 4/6] ice: remove null checks before devm_kfree() calls We all know they are redundant. Reviewed-by: Michal Swiatkowski Reviewed-by: Michal Wilczynski Reviewed-by: Simon Horman Signed-off-by: Przemek Kitszel Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 6 +-- drivers/net/ethernet/intel/ice/ice_controlq.c | 3 +- drivers/net/ethernet/intel/ice/ice_flow.c | 23 ++-------- drivers/net/ethernet/intel/ice/ice_lib.c | 42 +++++++------------ drivers/net/ethernet/intel/ice/ice_sched.c | 11 ++--- drivers/net/ethernet/intel/ice/ice_switch.c | 19 +++------ 6 files changed, 29 insertions(+), 75 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index eb2dc09837769..6acb40f3c2020 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -814,8 +814,7 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), lst_itr); } } - if (recps[i].root_buf) - devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); + devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); } ice_rm_all_sw_replay_rule_info(hw); devm_kfree(ice_hw_to_dev(hw), sw->recp_list); @@ -1011,8 +1010,7 @@ static int ice_cfg_fw_log(struct ice_hw *hw, bool enable) } out: - if (data) - devm_kfree(ice_hw_to_dev(hw), data); + devm_kfree(ice_hw_to_dev(hw), data); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 385fd88831dbd..e7d2474c431c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -339,8 +339,7 @@ do { \ } \ } \ /* free the buffer info list */ \ - if ((qi)->ring.cmd_buf) \ - devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ + devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ /* free DMA head */ \ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ } while (0) diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index ef103e47a8dc2..85cca572c22a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -1303,23 +1303,6 @@ ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id) return NULL; } -/** - * ice_dealloc_flow_entry - Deallocate flow entry memory - * @hw: pointer to the HW struct - * @entry: flow entry to be removed - */ -static void -ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry) -{ - if (!entry) - return; - - if (entry->entry) - devm_kfree(ice_hw_to_dev(hw), entry->entry); - - devm_kfree(ice_hw_to_dev(hw), entry); -} - /** * ice_flow_rem_entry_sync - Remove a flow entry * @hw: pointer to the HW struct @@ -1335,7 +1318,8 @@ ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk, list_del(&entry->l_entry); - ice_dealloc_flow_entry(hw, entry); + devm_kfree(ice_hw_to_dev(hw), entry->entry); + devm_kfree(ice_hw_to_dev(hw), entry); return 0; } @@ -1662,8 +1646,7 @@ ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id, out: if (status && e) { - if (e->entry) - devm_kfree(ice_hw_to_dev(hw), e->entry); + devm_kfree(ice_hw_to_dev(hw), e->entry); devm_kfree(ice_hw_to_dev(hw), e); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 5ddb95d1073a1..00e3afd507a40 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -321,31 +321,19 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->af_xdp_zc_qps) { - bitmap_free(vsi->af_xdp_zc_qps); - vsi->af_xdp_zc_qps = NULL; - } + bitmap_free(vsi->af_xdp_zc_qps); + vsi->af_xdp_zc_qps = NULL; /* free the ring and vector containers */ - if (vsi->q_vectors) { - devm_kfree(dev, vsi->q_vectors); - vsi->q_vectors = NULL; - } - if (vsi->tx_rings) { - devm_kfree(dev, vsi->tx_rings); - vsi->tx_rings = NULL; - } - if (vsi->rx_rings) { - devm_kfree(dev, vsi->rx_rings); - vsi->rx_rings = NULL; - } - if (vsi->txq_map) { - devm_kfree(dev, vsi->txq_map); - vsi->txq_map = NULL; - } - if (vsi->rxq_map) { - devm_kfree(dev, vsi->rxq_map); - vsi->rxq_map = NULL; - } + devm_kfree(dev, vsi->q_vectors); + vsi->q_vectors = NULL; + devm_kfree(dev, vsi->tx_rings); + vsi->tx_rings = NULL; + devm_kfree(dev, vsi->rx_rings); + vsi->rx_rings = NULL; + devm_kfree(dev, vsi->txq_map); + vsi->txq_map = NULL; + devm_kfree(dev, vsi->rxq_map); + vsi->rxq_map = NULL; } /** @@ -902,10 +890,8 @@ static void ice_rss_clean(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->rss_hkey_user) - devm_kfree(dev, vsi->rss_hkey_user); - if (vsi->rss_lut_user) - devm_kfree(dev, vsi->rss_lut_user); + devm_kfree(dev, vsi->rss_hkey_user); + devm_kfree(dev, vsi->rss_lut_user); ice_vsi_clean_rss_flow_fld(vsi); /* remove RSS replay list */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index b7682de0ae057..b664d60fd037e 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -358,10 +358,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) node->sibling; } - /* leaf nodes have no children */ - if (node->children) - devm_kfree(ice_hw_to_dev(hw), node->children); - + devm_kfree(ice_hw_to_dev(hw), node->children); kfree(node->name); xa_erase(&pi->sched_node_ids, node->id); devm_kfree(ice_hw_to_dev(hw), node); @@ -859,10 +856,8 @@ void ice_sched_cleanup_all(struct ice_hw *hw) if (!hw) return; - if (hw->layer_info) { - devm_kfree(ice_hw_to_dev(hw), hw->layer_info); - hw->layer_info = NULL; - } + devm_kfree(ice_hw_to_dev(hw), hw->layer_info); + hw->layer_info = NULL; ice_sched_clear_port(hw->port_info); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2ea9e1ae55174..6db4ca7978cbb 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1636,21 +1636,16 @@ ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi) */ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle) { - struct ice_vsi_ctx *vsi; + struct ice_vsi_ctx *vsi = ice_get_vsi_ctx(hw, vsi_handle); u8 i; - vsi = ice_get_vsi_ctx(hw, vsi_handle); if (!vsi) return; ice_for_each_traffic_class(i) { - if (vsi->lan_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); - vsi->lan_q_ctx[i] = NULL; - } - if (vsi->rdma_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); - vsi->rdma_q_ctx[i] = NULL; - } + devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); + vsi->lan_q_ctx[i] = NULL; + devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); + vsi->rdma_q_ctx[i] = NULL; } } @@ -5468,9 +5463,7 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, devm_kfree(ice_hw_to_dev(hw), fvit); } - if (rm->root_buf) - devm_kfree(ice_hw_to_dev(hw), rm->root_buf); - + devm_kfree(ice_hw_to_dev(hw), rm->root_buf); kfree(rm); err_free_lkup_exts: From 1dacc49782e67d4316b46329e416c24473c0369c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Jun 2023 22:44:13 +0200 Subject: [PATCH 5/6] ice: Remove managed memory usage in ice_get_fw_log_cfg() There is no need to use managed memory allocation here. The memory is released at the end of the function. Use kzalloc()/kfree() to simplify the code. Signed-off-by: Christophe JAILLET Reviewed-by: Pavan Chebbi Reviewed-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 6acb40f3c2020..e16d4c83ed5f9 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -833,7 +833,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) u16 size; size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX; - config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL); + config = kzalloc(size, GFP_KERNEL); if (!config) return -ENOMEM; @@ -856,7 +856,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) } } - devm_kfree(ice_hw_to_dev(hw), config); + kfree(config); return status; } From b7a0345723385c3cc0438cf4266ccc110dc7b583 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 13 Jun 2023 13:35:52 +0200 Subject: [PATCH 6/6] ice: use ice_down_up() where applicable ice_change_mtu() is currently using a separate ice_down() and ice_up() calls to reflect changed MTU. ice_down_up() serves this purpose, so do the refactoring here. Signed-off-by: Maciej Fijalkowski Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5dd88611141e3..93979ab18bc1d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7412,21 +7412,9 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } netdev->mtu = (unsigned int)new_mtu; - - /* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - err = ice_down(vsi); - if (err) { - netdev_err(netdev, "change MTU if_down err %d\n", err); - return err; - } - - err = ice_up(vsi); - if (err) { - netdev_err(netdev, "change MTU if_up err %d\n", err); - return err; - } - } + err = ice_down_up(vsi); + if (err) + return err; netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); set_bit(ICE_FLAG_MTU_CHANGED, pf->flags);