diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 46e9f4e0a02cd..ebe795a7f5f99 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -824,6 +824,7 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ cpumask_t affinity_mask; @@ -832,8 +833,6 @@ struct i40e_q_vector { struct rcu_head rcu; /* to avoid race with update stats on free */ char name[I40E_INT_NAME_STR_LEN]; bool arm_wb_state; -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 should adjust ITR */ } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 4c3b4243cf652..e9fc51bd6c951 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -315,9 +315,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) i, rx_ring->vsi, rx_ring->q_vector); dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_itr_setting = %d (%s)\n", - i, rx_ring->rx_itr_setting, - ITR_IS_DYNAMIC(rx_ring->rx_itr_setting) ? "dynamic" : "fixed"); + " rx_rings[%i]: itr_setting = %d (%s)\n", + i, rx_ring->itr_setting, + ITR_IS_DYNAMIC(rx_ring->itr_setting) ? "dynamic" : "fixed"); } for (i = 0; i < vsi->num_queue_pairs; i++) { struct i40e_ring *tx_ring = READ_ONCE(vsi->tx_rings[i]); @@ -366,9 +366,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " tx_rings[%i]: DCB tc = %d\n", i, tx_ring->dcb_tc); dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_itr_setting = %d (%s)\n", - i, tx_ring->tx_itr_setting, - ITR_IS_DYNAMIC(tx_ring->tx_itr_setting) ? "dynamic" : "fixed"); + " tx_rings[%i]: itr_setting = %d (%s)\n", + i, tx_ring->itr_setting, + ITR_IS_DYNAMIC(tx_ring->itr_setting) ? "dynamic" : "fixed"); } rcu_read_unlock(); dev_info(&pf->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 2f5bee713fefb..29a7412b2fa60 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -2244,14 +2244,14 @@ static int __i40e_get_coalesce(struct net_device *netdev, rx_ring = vsi->rx_rings[queue]; tx_ring = vsi->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2311,34 +2311,35 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_q_vector *q_vector; - u16 vector, intrl; + u16 intrl; intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit); - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); if (ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC; + rx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; if (ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; else - tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC; + tx_ring->itr_setting &= ~I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - wr32(hw, I40E_PFINT_RATEN(vector - 1), intrl); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ + + wr32(hw, I40E_PFINT_RATEN(q_vector->reg_idx), intrl); i40e_flush(hw); } @@ -2364,11 +2365,11 @@ static int __i40e_set_coalesce(struct net_device *netdev, vsi->work_limit = ec->tx_max_coalesced_frames_irq; if (queue < 0) { - cur_rx_itr = vsi->rx_rings[0]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[0]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[0]->itr_setting; + cur_tx_itr = vsi->tx_rings[0]->itr_setting; } else if (queue < vsi->num_queue_pairs) { - cur_rx_itr = vsi->rx_rings[queue]->rx_itr_setting; - cur_tx_itr = vsi->tx_rings[queue]->tx_itr_setting; + cur_rx_itr = vsi->rx_rings[queue]->itr_setting; + cur_tx_itr = vsi->tx_rings[queue]->itr_setting; } else { netif_info(pf, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", vsi->num_queue_pairs - 1); @@ -2396,7 +2397,7 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->rx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -2407,16 +2408,16 @@ static int __i40e_set_coalesce(struct net_device *netdev, return -EINVAL; } - if (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)) { + if (ec->tx_coalesce_usecs > I40E_MAX_ITR) { netif_info(pf, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } if (ec->use_adaptive_rx_coalesce && !cur_rx_itr) - ec->rx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->rx_coalesce_usecs = I40E_MIN_ITR; if (ec->use_adaptive_tx_coalesce && !cur_tx_itr) - ec->tx_coalesce_usecs = I40E_MIN_ITR << 1; + ec->tx_coalesce_usecs = I40E_MIN_ITR; intrl_reg = i40e_intrl_usec_to_reg(ec->rx_coalesce_usecs_high); vsi->int_rate_limit = INTRL_REG_TO_USEC(intrl_reg); @@ -4406,6 +4407,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } flags_complete: + changed_flags = orig_flags ^ new_flags; + /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. */ @@ -4415,13 +4418,17 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) !(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE)) return -EOPNOTSUPP; - /* Disable FW LLDP not supported if NPAR active or if FW - * API version < 1.7 + /* If the driver detected FW LLDP was disabled on init, this flag could + * be set, however we do not support _changing_ the flag if NPAR is + * enabled or FW API version < 1.7. There are situations where older + * FW versions/NPAR enabled PFs could disable LLDP, however we _must_ + * not allow the user to enable/disable LLDP with this flag on + * unsupported FW versions. */ - if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (pf->hw.func_caps.npar_enable) { dev_warn(&pf->pdev->dev, - "Unable to stop FW LLDP if NPAR active\n"); + "Unable to change FW LLDP if NPAR active\n"); return -EOPNOTSUPP; } @@ -4429,7 +4436,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) (pf->hw.aq.api_maj_ver == 1 && pf->hw.aq.api_min_ver < 7)) { dev_warn(&pf->pdev->dev, - "FW ver does not support stopping FW LLDP\n"); + "FW ver does not support changing FW LLDP\n"); return -EOPNOTSUPP; } } @@ -4439,6 +4446,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) * something else has modified the flags variable since we copied it * originally. We'll just punt with an error and log something in the * message buffer. + * + * This is the point of no return for this function. We need to have + * checked any discrepancies or misconfigurations and returned + * EOPNOTSUPP before updating pf->flags here. */ if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) { dev_warn(&pf->pdev->dev, @@ -4446,8 +4457,6 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) return -EAGAIN; } - changed_flags = orig_flags ^ new_flags; - /* Process any additional changes needed as a result of flag changes. * The changed_flags value reflects the list of bits that were * changed in the code above. @@ -4479,6 +4488,12 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } } + if ((changed_flags & pf->flags & + I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && + (pf->flags & I40E_FLAG_MFP_ENABLED)) + dev_warn(&pf->pdev->dev, + "Turning on link-down-on-close flag may affect other partitions\n"); + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { struct i40e_dcbx_config *dcbcfg; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e31adbc75f9cc..70ecd9c3a1632 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3449,15 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i]; - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[i]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = + ITR_TO_REG(vsi->rx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), - q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[i]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = + ITR_TO_REG(vsi->tx_rings[i]->itr_setting); wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), - q_vector->tx.itr); + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + wr32(hw, I40E_PFINT_RATEN(vector - 1), i40e_intrl_usec_to_reg(vsi->int_rate_limit)); @@ -3558,13 +3563,14 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) u32 val; /* set the ITR configuration */ - q_vector->itr_countdown = ITR_COUNTDOWN_START; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[0]->rx_itr_setting); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[0]->tx_itr_setting); - q_vector->tx.latency_range = I40E_LOW_LATENCY; - wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); + wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; i40e_enable_misc_int_causes(pf); @@ -9215,6 +9221,17 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); + if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && + ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || + hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { + /* The following delay is necessary for 4.33 firmware and older + * to recover after EMP reset. 200 ms should suffice but we + * put here 300 ms to be sure that FW is ready to operate + * after reset. + */ + mdelay(300); + } + /* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) i40e_verify_eeprom(pf); @@ -10018,7 +10035,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->tx_rings[i] = ring++; if (!i40e_enabled_xdp_vsi(vsi)) @@ -10036,7 +10053,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) ring->flags = I40E_TXR_FLAGS_WB_ON_ITR; set_ring_xdp(ring); - ring->tx_itr_setting = pf->tx_itr_default; + ring->itr_setting = pf->tx_itr_default; vsi->xdp_rings[i] = ring++; setup_rx: @@ -10049,7 +10066,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->count = vsi->num_desc; ring->size = 0; ring->dcb_tc = 0; - ring->rx_itr_setting = pf->rx_itr_default; + ring->itr_setting = pf->rx_itr_default; vsi->rx_rings[i] = ring; } @@ -10328,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll, NAPI_POLL_WEIGHT); - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e554aa6cf0702..1ec9b1d8023da 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -995,99 +995,241 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) } } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->vsi->back->hw.phy.link_info.link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } + + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; + } + + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1991,7 +2133,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -2274,29 +2416,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_PFINT_DYN_CTLN_INTENA_MASK | - I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) | - (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT); + (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->rx_rings[idx]->rx_itr_setting; -} -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - return vsi->tx_rings[idx]->tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -2308,10 +2466,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; + u32 intval; /* If we don't have MSIX, then we only need to re-enable icr0 */ if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) { @@ -2319,65 +2474,49 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, return; } - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); - - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } - - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 701b708628b0d..f75a8fe68fcf0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -30,32 +30,37 @@ #include /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) + /** * i40e_intrl_usec_to_reg - convert interrupt rate limit to register * @intrl: interrupt rate limit to convert @@ -382,8 +387,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -459,21 +463,21 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ring->flags |= I40E_TXR_FLAGS_XDP; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 357d6051281f3..eb8f3e327f6ba 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -392,99 +392,241 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) val); } +static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) +{ + return &q_vector->rx == rc; +} + +static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector) +{ + unsigned int divisor; + + switch (q_vector->adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024; + break; + case I40E_LINK_SPEED_25GB: + case I40E_LINK_SPEED_20GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512; + break; + default: + case I40E_LINK_SPEED_10GB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256; + break; + case I40E_LINK_SPEED_1GB: + case I40E_LINK_SPEED_100MB: + divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32; + break; + } + + return divisor; +} + /** - * i40e_set_new_dynamic_itr - Find new ITR level + * i40e_update_itr - update the dynamic ITR value based on statistics + * @q_vector: structure containing interrupt and ring information * @rc: structure containing ring performance data * - * Returns true if ITR changed, false if not - * - * Stores a new ITR value based on packets and byte counts during - * the last interrupt. The advantage of per interrupt computation - * is faster updates and more accurate ITR for the current traffic - * pattern. Constants in this function were computed based on - * theoretical maximum wire speed and thresholds were set based on - * testing data as well as attempting to minimize response time + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time * while increasing bulk throughput. **/ -static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc) +static void i40e_update_itr(struct i40e_q_vector *q_vector, + struct i40e_ring_container *rc) { - enum i40e_latency_range new_latency_range = rc->latency_range; - u32 new_itr = rc->itr; - int bytes_per_usec; - unsigned int usecs, estimated_usecs; + unsigned int avg_wire_size, packets, bytes, itr; + unsigned long next_update = jiffies; - if (rc->total_packets == 0 || !rc->itr) - return false; + /* If we don't have any rings just leave ourselves set for maximum + * possible latency so we take ourselves out of the equation. + */ + if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting)) + return; + + /* For Rx we want to push the delay up and default to low latency. + * for Tx we want to pull the delay down and default to high latency. + */ + itr = i40e_container_is_rx(q_vector, rc) ? + I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY : + I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY; + + /* If we didn't update within up to 1 - 2 jiffies we can assume + * that either packets are coming in so slow there hasn't been + * any work, or that there is so much work that NAPI is dealing + * with interrupt moderation and we don't need to do anything. + */ + if (time_after(next_update, rc->next_update)) + goto clear_counts; + + /* If itr_countdown is set it means we programmed an ITR within + * the last 4 interrupt cycles. This has a side effect of us + * potentially firing an early interrupt. In order to work around + * this we need to throw out any data received for a few + * interrupts following the update. + */ + if (q_vector->itr_countdown) { + itr = rc->target_itr; + goto clear_counts; + } + + packets = rc->total_packets; + bytes = rc->total_bytes; - usecs = (rc->itr << 1) * ITR_COUNTDOWN_START; - bytes_per_usec = rc->total_bytes / usecs; + if (i40e_container_is_rx(q_vector, rc)) { + /* If Rx there are 1 to 4 packets and bytes are less than + * 9000 assume insufficient data to use bulk rate limiting + * approach unless Tx is already in bulk rate limiting. We + * are likely latency driven. + */ + if (packets && packets < 4 && bytes < 9000 && + (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) { + itr = I40E_ITR_ADAPTIVE_LATENCY; + goto adjust_by_size; + } + } else if (packets < 4) { + /* If we have Tx and Rx ITR maxed and Tx ITR is running in + * bulk mode and we are receiving 4 or fewer packets just + * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so + * that the Rx can relax. + */ + if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS && + (q_vector->rx.target_itr & I40E_ITR_MASK) == + I40E_ITR_ADAPTIVE_MAX_USECS) + goto clear_counts; + } else if (packets > 32) { + /* If we have processed over 32 packets in a single interrupt + * for Tx assume we need to switch over to "bulk" mode. + */ + rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY; + } - /* The calculations in this algorithm depend on interrupts actually - * firing at the ITR rate. This may not happen if the packet rate is - * really low, or if we've been napi polling. Check to make sure - * that's not the case before we continue. + /* We have no packets to actually measure against. This means + * either one of the other queues on this vector is active or + * we are a Tx queue doing TSO with too high of an interrupt rate. + * + * Between 4 and 56 we can assume that our current interrupt delay + * is only slightly too low. As such we should increase it by a small + * fixed amount. */ - estimated_usecs = jiffies_to_usecs(jiffies - rc->last_itr_update); - if (estimated_usecs > usecs) { - new_latency_range = I40E_LOW_LATENCY; - goto reset_latency; + if (packets < 56) { + itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC; + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; + } + goto clear_counts; + } + + if (packets <= 256) { + itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr); + itr &= I40E_ITR_MASK; + + /* Between 56 and 112 is our "goldilocks" zone where we are + * working out "just right". Just report that our current + * ITR is good for us. + */ + if (packets <= 112) + goto clear_counts; + + /* If packet count is 128 or greater we are likely looking + * at a slight overrun of the delay we want. Try halving + * our delay to see if that will cut the number of packets + * in half per interrupt. + */ + itr /= 2; + itr &= I40E_ITR_MASK; + if (itr < I40E_ITR_ADAPTIVE_MIN_USECS) + itr = I40E_ITR_ADAPTIVE_MIN_USECS; + + goto clear_counts; } - /* simple throttlerate management - * 0-10MB/s lowest (50000 ints/s) - * 10-20MB/s low (20000 ints/s) - * 20-1249MB/s bulk (18000 ints/s) + /* The paths below assume we are dealing with a bulk ITR since + * number of packets is greater than 256. We are just going to have + * to compute a value and try to bring the count under control, + * though for smaller packet sizes there isn't much we can do as + * NAPI polling will likely be kicking in sooner rather than later. + */ + itr = I40E_ITR_ADAPTIVE_BULK; + +adjust_by_size: + /* If packet counts are 256 or greater we can assume we have a gross + * overestimation of what the rate should be. Instead of trying to fine + * tune it just use the formula below to try and dial in an exact value + * give the current packet size of the frame. + */ + avg_wire_size = bytes / packets; + + /* The following is a crude approximation of: + * wmem_default / (size + overhead) = desired_pkts_per_int + * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate + * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * - * The math works out because the divisor is in 10^(-6) which - * turns the bytes/us input value into MB/s values, but - * make sure to use usecs, as the register values written - * are in 2 usec increments in the ITR registers, and make sure - * to use the smoothed values that the countdown timer gives us. + * Assuming wmem_default is 212992 and overhead is 640 bytes per + * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the + * formula down to + * + * (170 * (size + 24)) / (size + 640) = ITR + * + * We first do some math on the packet size and then finally bitshift + * by 8 after rounding up. We also have to account for PCIe link speed + * difference as ITR scales based on this. */ - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - if (bytes_per_usec > 10) - new_latency_range = I40E_LOW_LATENCY; - break; - case I40E_LOW_LATENCY: - if (bytes_per_usec > 20) - new_latency_range = I40E_BULK_LATENCY; - else if (bytes_per_usec <= 10) - new_latency_range = I40E_LOWEST_LATENCY; - break; - case I40E_BULK_LATENCY: - default: - if (bytes_per_usec <= 20) - new_latency_range = I40E_LOW_LATENCY; - break; + if (avg_wire_size <= 60) { + /* Start at 250k ints/sec */ + avg_wire_size = 4096; + } else if (avg_wire_size <= 380) { + /* 250K ints/sec to 60K ints/sec */ + avg_wire_size *= 40; + avg_wire_size += 1696; + } else if (avg_wire_size <= 1084) { + /* 60K ints/sec to 36K ints/sec */ + avg_wire_size *= 15; + avg_wire_size += 11452; + } else if (avg_wire_size <= 1980) { + /* 36K ints/sec to 30K ints/sec */ + avg_wire_size *= 5; + avg_wire_size += 22420; + } else { + /* plateau at a limit of 30K ints/sec */ + avg_wire_size = 32256; } -reset_latency: - rc->latency_range = new_latency_range; + /* If we are in low latency mode halve our delay which doubles the + * rate to somewhere between 100K to 16K ints/sec + */ + if (itr & I40E_ITR_ADAPTIVE_LATENCY) + avg_wire_size /= 2; - switch (new_latency_range) { - case I40E_LOWEST_LATENCY: - new_itr = I40E_ITR_50K; - break; - case I40E_LOW_LATENCY: - new_itr = I40E_ITR_20K; - break; - case I40E_BULK_LATENCY: - new_itr = I40E_ITR_18K; - break; - default: - break; + /* Resultant value is 256 times larger than it needs to be. This + * gives us room to adjust the value as needed to either increase + * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. + * + * Use addition as we have already recorded the new latency flag + * for the ITR value. + */ + itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) * + I40E_ITR_ADAPTIVE_MIN_INC; + + if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) { + itr &= I40E_ITR_ADAPTIVE_LATENCY; + itr += I40E_ITR_ADAPTIVE_MAX_USECS; } +clear_counts: + /* write back value */ + rc->target_itr = itr; + + /* next update should occur within next jiffy */ + rc->next_update = next_update + 1; + rc->total_bytes = 0; rc->total_packets = 0; - rc->last_itr_update = jiffies; - - if (new_itr != rc->itr) { - rc->itr = new_itr; - return true; - } - return false; } /** @@ -1273,7 +1415,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * @rx_buffer: rx buffer to pull data from * * This function will clean up the contents of the rx_buffer. It will - * either recycle the bufer or unmap it and free the associated resources. + * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer) @@ -1457,33 +1599,45 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) return failure ? budget : (int)total_rx_packets; } -static u32 i40e_buildreg_itr(const int type, const u16 itr) +static inline u32 i40e_buildreg_itr(const int type, u16 itr) { u32 val; + /* We don't bother with setting the CLEARPBA bit as the data sheet + * points out doing so is "meaningless since it was already + * auto-cleared". The auto-clearing happens when the interrupt is + * asserted. + * + * Hardware errata 28 for also indicates that writing to a + * xxINT_DYN_CTLx CSR with INTENA_MSK (bit 31) set to 0 will clear + * an event in the PBA anyway so we need to rely on the automask + * to hold pending events for us until the interrupt is re-enabled + * + * The itr value is reported in microseconds, and the register + * value is recorded in 2 microsecond units. For this reason we + * only need to shift by the interval shift - 1 instead of the + * full value. + */ + itr &= I40E_ITR_MASK; + val = I40E_VFINT_DYN_CTLN1_INTENA_MASK | - I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK | (type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) | - (itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT); + (itr << (I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT - 1)); return val; } /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 -static inline int get_rx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - return adapter->rx_rings[idx].rx_itr_setting; -} - -static inline int get_tx_itr(struct i40e_vsi *vsi, int idx) -{ - struct i40evf_adapter *adapter = vsi->back; - - return adapter->tx_rings[idx].tx_itr_setting; -} +/* The act of updating the ITR will cause it to immediately trigger. In order + * to prevent this from throwing off adaptive update statistics we defer the + * update so that it can only happen so often. So after either Tx or Rx are + * updated we make the adaptive scheme wait until either the ITR completely + * expires via the next_update expiration or we have been through at least + * 3 interrupts. + */ +#define ITR_COUNTDOWN_START 3 /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1495,70 +1649,51 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector) { struct i40e_hw *hw = &vsi->back->hw; - bool rx = false, tx = false; - u32 rxval, txval; - int idx = q_vector->v_idx; - int rx_itr_setting, tx_itr_setting; - - /* avoid dynamic calculation if in countdown mode OR if - * all dynamic is disabled - */ - rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); - - rx_itr_setting = get_rx_itr(vsi, idx); - tx_itr_setting = get_tx_itr(vsi, idx); + u32 intval; - if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(rx_itr_setting) && - !ITR_IS_DYNAMIC(tx_itr_setting))) { - goto enable_int; - } - - if (ITR_IS_DYNAMIC(rx_itr_setting)) { - rx = i40e_set_new_dynamic_itr(&q_vector->rx); - rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); - } + /* These will do nothing if dynamic updates are not enabled */ + i40e_update_itr(q_vector, &q_vector->tx); + i40e_update_itr(q_vector, &q_vector->rx); - if (ITR_IS_DYNAMIC(tx_itr_setting)) { - tx = i40e_set_new_dynamic_itr(&q_vector->tx); - txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); - } - - if (rx || tx) { - /* get the higher of the two ITR adjustments and - * use the same value for both ITR registers - * when in adaptive mode (Rx and/or Tx) - */ - u16 itr = max(q_vector->tx.itr, q_vector->rx.itr); - - q_vector->tx.itr = q_vector->rx.itr = itr; - txval = i40e_buildreg_itr(I40E_TX_ITR, itr); - tx = true; - rxval = i40e_buildreg_itr(I40E_RX_ITR, itr); - rx = true; - } - - /* only need to enable the interrupt once, but need - * to possibly update both ITR values + /* This block of logic allows us to get away with only updating + * one ITR value with each interrupt. The idea is to perform a + * pseudo-lazy update with the following criteria. + * + * 1. Rx is given higher priority than Tx if both are in same state + * 2. If we must reduce an ITR that is given highest priority. + * 3. We then give priority to increasing ITR based on amount. */ - if (rx) { - /* set the INTENA_MSK_MASK so that this first write - * won't actually enable the interrupt, instead just - * updating the ITR (it's bit 31 PF and VF) + if (q_vector->rx.target_itr < q_vector->rx.current_itr) { + /* Rx ITR needs to be reduced, this is highest priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) || + ((q_vector->rx.target_itr - q_vector->rx.current_itr) < + (q_vector->tx.target_itr - q_vector->tx.current_itr))) { + /* Tx ITR needs to be reduced, this is second priority + * Tx ITR needs to be increased more than Rx, fourth priority */ - rxval |= BIT(31); - /* don't check _DOWN because interrupt isn't being enabled */ - wr32(hw, INTREG(q_vector->reg_idx), rxval); + intval = i40e_buildreg_itr(I40E_TX_ITR, + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) { + /* Rx ITR needs to be increased, third priority */ + intval = i40e_buildreg_itr(I40E_RX_ITR, + q_vector->rx.target_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; + q_vector->itr_countdown = ITR_COUNTDOWN_START; + } else { + /* No ITR update, lowest priority */ + intval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + if (q_vector->itr_countdown) + q_vector->itr_countdown--; } -enable_int: if (!test_bit(__I40E_VSI_DOWN, vsi->state)) - wr32(hw, INTREG(q_vector->reg_idx), txval); - - if (q_vector->itr_countdown) - q_vector->itr_countdown--; - else - q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, INTREG(q_vector->reg_idx), intval); } /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 7798a6645c3f1..9129447d079b1 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -28,31 +28,35 @@ #define _I40E_TXRX_H_ /* Interrupt Throttling and Rate Limiting Goodies */ - -#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ -#define I40E_MIN_ITR 0x0001 /* reg uses 2 usec resolution */ -#define I40E_ITR_100K 0x0005 -#define I40E_ITR_50K 0x000A -#define I40E_ITR_20K 0x0019 -#define I40E_ITR_18K 0x001B -#define I40E_ITR_8K 0x003E -#define I40E_ITR_4K 0x007A -#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ -#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \ - I40E_ITR_DYNAMIC) -#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ -#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */ -#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */ #define I40E_DEFAULT_IRQ_WORK 256 -#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1) -#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC)) -#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1) + +/* The datasheet for the X710 and XL710 indicate that the maximum value for + * the ITR is 8160usec which is then called out as 0xFF0 with a 2usec + * resolution. 8160 is 0x1FE0 when written out in hex. So instead of storing + * the register value which is divided by 2 lets use the actual values and + * avoid an excessive amount of translation. + */ +#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */ +#define I40E_ITR_MASK 0x1FFE /* mask for ITR register value */ +#define I40E_MIN_ITR 2 /* reg uses 2 usec resolution */ +#define I40E_ITR_100K 10 /* all values below must be even */ +#define I40E_ITR_50K 20 +#define I40E_ITR_20K 50 +#define I40E_ITR_18K 60 +#define I40E_ITR_8K 122 +#define I40E_MAX_ITR 8160 /* maximum value as per datasheet */ +#define ITR_TO_REG(setting) ((setting) & ~I40E_ITR_DYNAMIC) +#define ITR_REG_ALIGN(setting) __ALIGN_MASK(setting, ~I40E_ITR_MASK) +#define ITR_IS_DYNAMIC(setting) (!!((setting) & I40E_ITR_DYNAMIC)) + +#define I40E_ITR_RX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) +#define I40E_ITR_TX_DEF (I40E_ITR_20K | I40E_ITR_DYNAMIC) + /* 0x40 is the enable bit for interrupt rate limiting, and must be set if * the value of the rate limit is non-zero */ #define INTRL_ENA BIT(6) +#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */ #define INTRL_REG_TO_USEC(intrl) ((intrl & ~INTRL_ENA) << 2) #define INTRL_USEC_TO_REG(set) ((set) ? ((set) >> 2) | INTRL_ENA : 0) #define I40E_INTRL_8K 125 /* 8000 ints/sec */ @@ -362,8 +366,7 @@ struct i40e_ring { * these values always store the USER setting, and must be converted * before programming to a register. */ - u16 rx_itr_setting; - u16 tx_itr_setting; + u16 itr_setting; u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -425,21 +428,21 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; } -enum i40e_latency_range { - I40E_LOWEST_LATENCY = 0, - I40E_LOW_LATENCY = 1, - I40E_BULK_LATENCY = 2, -}; +#define I40E_ITR_ADAPTIVE_MIN_INC 0x0002 +#define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002 +#define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e +#define I40E_ITR_ADAPTIVE_LATENCY 0x8000 +#define I40E_ITR_ADAPTIVE_BULK 0x0000 +#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY)) struct i40e_ring_container { - /* array of pointers to rings */ - struct i40e_ring *ring; + struct i40e_ring *ring; /* pointer to linked list of ring(s) */ + unsigned long next_update; /* jiffies value of next update */ unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ - unsigned long last_itr_update; /* jiffies of last ITR update */ u16 count; - enum i40e_latency_range latency_range; - u16 itr; + u16 target_itr; /* target ITR setting for ring(s) */ + u16 current_itr; /* current ITR setting for ring(s) */ }; /* iterator for handling rings in ring container */ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 9690c1ea019ec..b6991e8014d89 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -117,9 +117,8 @@ struct i40e_q_vector { struct i40e_ring_container rx; struct i40e_ring_container tx; u32 ring_mask; + u8 itr_countdown; /* when 0 should adjust adaptive ITR */ u8 num_ringpairs; /* total number of ring pairs in vector */ -#define ITR_COUNTDOWN_START 100 - u8 itr_countdown; /* when 0 or 1 update ITR */ u16 v_idx; /* index in the vsi->q_vector array. */ u16 reg_idx; /* register index of the interrupt */ char name[IFNAMSIZ + 15]; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index e2d8aa19d205f..aded3ad7763e9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -457,14 +457,14 @@ static int __i40evf_get_coalesce(struct net_device *netdev, rx_ring = &adapter->rx_rings[queue]; tx_ring = &adapter->tx_rings[queue]; - if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + if (ITR_IS_DYNAMIC(rx_ring->itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->itr_setting & ~I40E_ITR_DYNAMIC; return 0; } @@ -502,7 +502,7 @@ static int i40evf_get_per_queue_coalesce(struct net_device *netdev, /** * i40evf_set_itr_per_queue - set ITR values for specific queue - * @vsi: the VSI to set values for + * @adapter: the VF adapter struct to set values for * @ec: coalesce settings from ethtool * @queue: the queue to modify * @@ -514,33 +514,29 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, { struct i40e_ring *rx_ring = &adapter->rx_rings[queue]; struct i40e_ring *tx_ring = &adapter->tx_rings[queue]; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_hw *hw = &adapter->hw; struct i40e_q_vector *q_vector; - u16 vector; - rx_ring->rx_itr_setting = ec->rx_coalesce_usecs; - tx_ring->tx_itr_setting = ec->tx_coalesce_usecs; + rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); + tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); - rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC; + rx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_rx_coalesce) - rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC; + rx_ring->itr_setting ^= I40E_ITR_DYNAMIC; - tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC; + tx_ring->itr_setting |= I40E_ITR_DYNAMIC; if (!ec->use_adaptive_tx_coalesce) - tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC; + tx_ring->itr_setting ^= I40E_ITR_DYNAMIC; q_vector = rx_ring->q_vector; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector = tx_ring->q_vector; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - vector = vsi->base_vector + q_vector->v_idx; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); - i40e_flush(hw); + /* The interrupt handler itself will take care of programming + * the Tx and Rx ITR values based on the values we have entered + * into the q_vector, no need to write the values now. + */ } /** @@ -565,8 +561,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->rx_coalesce_usecs == 0) { if (ec->use_adaptive_rx_coalesce) netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); - } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->rx_coalesce_usecs < I40E_MIN_ITR) || + (ec->rx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); return -EINVAL; } @@ -575,8 +571,8 @@ static int __i40evf_set_coalesce(struct net_device *netdev, if (ec->tx_coalesce_usecs == 0) { if (ec->use_adaptive_tx_coalesce) netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); - } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || - (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + } else if ((ec->tx_coalesce_usecs < I40E_MIN_ITR) || + (ec->tx_coalesce_usecs > I40E_MAX_ITR)) { netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 16989ad2ca90b..6fd09926181ae 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -353,11 +353,12 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) rx_ring->vsi = &adapter->vsi; q_vector->rx.ring = rx_ring; q_vector->rx.count++; - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); + q_vector->rx.next_update = jiffies + 1; + q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->ring_mask |= BIT(r_idx); - q_vector->itr_countdown = ITR_COUNTDOWN_START; - wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), + q_vector->rx.current_itr); + q_vector->rx.current_itr = q_vector->rx.target_itr; } /** @@ -378,11 +379,12 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) tx_ring->vsi = &adapter->vsi; q_vector->tx.ring = tx_ring; q_vector->tx.count++; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); - q_vector->itr_countdown = ITR_COUNTDOWN_START; + q_vector->tx.next_update = jiffies + 1; + q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->num_ringpairs++; - wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), + q_vector->tx.target_itr); + q_vector->tx.current_itr = q_vector->tx.target_itr; } /** @@ -1169,7 +1171,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; - tx_ring->tx_itr_setting = I40E_ITR_TX_DEF; + tx_ring->itr_setting = I40E_ITR_TX_DEF; if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1178,7 +1180,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; - rx_ring->rx_itr_setting = I40E_ITR_RX_DEF; + rx_ring->itr_setting = I40E_ITR_RX_DEF; } adapter->num_active_queues = num_active_queues; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index 50ce0d6c09ef7..d57a672855055 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -344,6 +344,7 @@ void i40evf_disable_queues(struct i40evf_adapter *adapter) void i40evf_map_queues(struct i40evf_adapter *adapter) { struct virtchnl_irq_map_info *vimi; + struct virtchnl_vector_map *vecmap; int v_idx, q_vectors, len; struct i40e_q_vector *q_vector; @@ -367,17 +368,22 @@ void i40evf_map_queues(struct i40evf_adapter *adapter) vimi->num_vectors = adapter->num_msix_vectors; /* Queue vectors first */ for (v_idx = 0; v_idx < q_vectors; v_idx++) { - q_vector = adapter->q_vectors + v_idx; - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = v_idx + NONQ_VECS; - vimi->vecmap[v_idx].txq_map = q_vector->ring_mask; - vimi->vecmap[v_idx].rxq_map = q_vector->ring_mask; + q_vector = &adapter->q_vectors[v_idx]; + vecmap = &vimi->vecmap[v_idx]; + + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = v_idx + NONQ_VECS; + vecmap->txq_map = q_vector->ring_mask; + vecmap->rxq_map = q_vector->ring_mask; + vecmap->rxitr_idx = I40E_RX_ITR; + vecmap->txitr_idx = I40E_TX_ITR; } /* Misc vector last - this is only for AdminQ messages */ - vimi->vecmap[v_idx].vsi_id = adapter->vsi_res->vsi_id; - vimi->vecmap[v_idx].vector_id = 0; - vimi->vecmap[v_idx].txq_map = 0; - vimi->vecmap[v_idx].rxq_map = 0; + vecmap = &vimi->vecmap[v_idx]; + vecmap->vsi_id = adapter->vsi_res->vsi_id; + vecmap->vector_id = 0; + vecmap->txq_map = 0; + vecmap->rxq_map = 0; adapter->aq_required &= ~I40EVF_FLAG_AQ_MAP_VECTORS; i40evf_send_pf_msg(adapter, VIRTCHNL_OP_CONFIG_IRQ_MAP,