From 6d779b41f715ce7cd0f3ddbf809a3b8aaff72115 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Date: Sat, 28 Sep 2013 06:00:02 +0000 Subject: [PATCH 01/13] i40e: Link code updates Link events should not print to the log until the device is administratively up. Signed-off-by: Anjali Singhai Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 221aa47950176..657babe82c9c6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3703,8 +3703,11 @@ static int i40e_up_complete(struct i40e_vsi *vsi) if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) && (vsi->netdev)) { + netdev_info(vsi->netdev, "NIC Link is Up\n"); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); + } else if (vsi->netdev) { + netdev_info(vsi->netdev, "NIC Link is Down\n"); } i40e_service_event_schedule(pf); @@ -4153,8 +4156,9 @@ static void i40e_link_event(struct i40e_pf *pf) if (new_link == old_link) return; - netdev_info(pf->vsi[pf->lan_vsi]->netdev, - "NIC Link is %s\n", (new_link ? "Up" : "Down")); + if (!test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state)) + netdev_info(pf->vsi[pf->lan_vsi]->netdev, + "NIC Link is %s\n", (new_link ? "Up" : "Down")); /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. From c304fdac6cc0aab1f01e0f2b32c881d908a57a84 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:12 +0000 Subject: [PATCH 02/13] i40e: Drop unused completed stat The Tx "completed" stat was part of the original rewrite for detecting Tx hangs. However some time ago in ixgbe I determined that we could just use the packets stat instead. Since then this stat was removed from ixgbe and it serves no purpose in i40e so it can be dropped. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 3 +-- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +-- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 8dbd91f64b74d..0b01c61f70c22 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -560,10 +560,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->tx_rings[i].tx_stats.bytes, vsi->tx_rings[i].tx_stats.restart_queue); dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_stats: tx_busy = %lld, completed = %lld, tx_done_old = %lld\n", + " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n", i, vsi->tx_rings[i].tx_stats.tx_busy, - vsi->tx_rings[i].tx_stats.completed, vsi->tx_rings[i].tx_stats.tx_done_old); dev_info(&pf->pdev->dev, " tx_rings[%i]: size = %i, dma = 0x%08lx\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 49d2cfa9b0cce..32c9aebcb5754 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -346,8 +346,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) break; - /* count the packet as being completed */ - tx_ring->tx_stats.completed++; + /* clear next_to_watch to prevent false hangs */ tx_buf->next_to_watch = NULL; tx_buf->time_stamp = 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index b1d7722d98a7f..2fbacaff0445e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -134,7 +134,6 @@ struct i40e_tx_queue_stats { u64 bytes; u64 restart_queue; u64 tx_busy; - u64 completed; u64 tx_done_old; }; From 35a1e2ad1716fe3d956eea6e356ca2758f6392a7 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:17 +0000 Subject: [PATCH 03/13] i40e: Cleanup Tx buffer info layout - drop the mapped_as_page u8 from the Tx buffer info as it was unused - use the DMA unmap accessors for Tx DMA - replace checks of DMA with checks of the unmap length to verify if an unmap is needed - update the Tx buffer layout to make it consistent with igb, ixgbe Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 ++++++++-------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 13 ++++++------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 32c9aebcb5754..3bc3efa6229e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -195,20 +195,20 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id) static inline void i40e_unmap_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { - if (tx_buffer->dma) { + if (dma_unmap_len(tx_buffer, len)) { if (tx_buffer->tx_flags & I40E_TX_FLAGS_MAPPED_AS_PAGE) dma_unmap_page(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); else dma_unmap_single(ring->dev, - tx_buffer->dma, - tx_buffer->length, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); } - tx_buffer->dma = 0; tx_buffer->time_stamp = 0; + dma_unmap_len_set(tx_buffer, len, 0); } /** @@ -1554,9 +1554,9 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, } tx_bi = &tx_ring->tx_bi[i]; - tx_bi->length = buf_offset + size; + dma_unmap_len_set(tx_bi, len, buf_offset + size); + dma_unmap_addr_set(tx_bi, dma, dma); tx_bi->tx_flags = tx_flags; - tx_bi->dma = dma; tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 2fbacaff0445e..711f549384897 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -110,15 +110,14 @@ #define I40E_TX_FLAGS_VLAN_SHIFT 16 struct i40e_tx_buffer { - struct sk_buff *skb; - dma_addr_t dma; - unsigned long time_stamp; - u16 length; - u32 tx_flags; struct i40e_tx_desc *next_to_watch; + unsigned long time_stamp; + struct sk_buff *skb; unsigned int bytecount; - u16 gso_segs; - u8 mapped_as_page; + unsigned short gso_segs; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; }; struct i40e_rx_buffer { From fc4ac67bc98f1d54ec0ef8bcddf54892ad78e1f9 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:22 +0000 Subject: [PATCH 04/13] i40e: Do not directly increment Tx next_to_use Avoid directly incrementing next_to_use for multiple reasons. The main reason being that if we directly increment it then it can attain a state where it is equal to the ring count. Technically this is a state it should not be able to reach but the way this is written it now can. This patch pulls the value off into a register and then increments it and writes back either the value or 0 depending on if the value is equal to the ring count. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 46 +++++++++++---------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3bc3efa6229e8..e8db4dca6e85a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -73,11 +73,12 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, goto dma_fail; /* grab the next descriptor */ - fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use); - tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use]; - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + tx_buf = &tx_ring->tx_bi[i]; + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32((fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) @@ -134,11 +135,11 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); /* Now program a dummy descriptor */ - tx_desc = I40E_TX_DESC(tx_ring, tx_ring->next_to_use); - tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use]; - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + i = tx_ring->next_to_use; + tx_desc = I40E_TX_DESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; tx_desc->buffer_addr = cpu_to_le64(dma); td_cmd = I40E_TX_DESC_CMD_EOP | @@ -148,9 +149,6 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0); - /* Mark the data descriptor to be watched */ - tx_buf->next_to_watch = tx_desc; - /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -158,6 +156,9 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, */ wmb(); + /* Mark the data descriptor to be watched */ + tx_buf->next_to_watch = tx_desc; + writel(tx_ring->next_to_use, tx_ring->tail); return 0; @@ -1143,6 +1144,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, struct tcphdr *th; unsigned int hlen; u32 flex_ptype, dtype_cmd; + u16 i; /* make sure ATR is enabled */ if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED)) @@ -1182,10 +1184,11 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->atr_count = 0; /* grab the next descriptor */ - fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use); - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & I40E_TXD_FLTR_QW0_QINDEX_MASK; @@ -1481,15 +1484,16 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, const u32 cd_tunneling, const u32 cd_l2tag2) { struct i40e_tx_context_desc *context_desc; + int i = tx_ring->next_to_use; if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2) return; /* grab the next descriptor */ - context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use); - tx_ring->next_to_use++; - if (tx_ring->next_to_use == tx_ring->count) - tx_ring->next_to_use = 0; + context_desc = I40E_TX_CTXTDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; /* cpu_to_le32 and assign to struct fields */ context_desc->tunneling_params = cpu_to_le32(cd_tunneling); From a5e9c5726424b05a3643450d45c134cea103181e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:27 +0000 Subject: [PATCH 05/13] i40e: clean up Tx fast path Sync the fast path for i40e_tx_map and i40e_clean_tx_irq so that they are similar to igb and ixgbe. - Only update the Tx descriptor ring in tx_map - Make skb mapping always on the first buffer in the chain - Drop the use of MAPPED_AS_PAGE Tx flag - Only store flags on the first buffer_info structure Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 220 +++++++++++--------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 - 2 files changed, 122 insertions(+), 99 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e8db4dca6e85a..e96de7567856d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -189,27 +189,30 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id) } /** - * i40e_unmap_tx_resource - Release a Tx buffer + * i40e_unmap_and_free_tx_resource - Release a Tx buffer * @ring: the ring that owns the buffer * @tx_buffer: the buffer to free **/ -static inline void i40e_unmap_tx_resource(struct i40e_ring *ring, - struct i40e_tx_buffer *tx_buffer) +static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, + struct i40e_tx_buffer *tx_buffer) { - if (dma_unmap_len(tx_buffer, len)) { - if (tx_buffer->tx_flags & I40E_TX_FLAGS_MAPPED_AS_PAGE) - dma_unmap_page(ring->dev, - dma_unmap_addr(tx_buffer, dma), - dma_unmap_len(tx_buffer, len), - DMA_TO_DEVICE); - else + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), dma_unmap_len(tx_buffer, len), DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); } - tx_buffer->time_stamp = 0; + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); + /* tx_buffer must be completely set up in the transmit path */ } /** @@ -218,7 +221,6 @@ static inline void i40e_unmap_tx_resource(struct i40e_ring *ring, **/ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) { - struct i40e_tx_buffer *tx_buffer; unsigned long bi_size; u16 i; @@ -227,13 +229,8 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) return; /* Free all the Tx ring sk_buffs */ - for (i = 0; i < tx_ring->count; i++) { - tx_buffer = &tx_ring->tx_bi[i]; - i40e_unmap_tx_resource(tx_ring, tx_buffer); - if (tx_buffer->skb) - dev_kfree_skb_any(tx_buffer->skb); - tx_buffer->skb = NULL; - } + for (i = 0; i < tx_ring->count; i++) + i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]); bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count; memset(tx_ring->tx_bi, 0, bi_size); @@ -332,16 +329,18 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) tx_buf = &tx_ring->tx_bi[i]; tx_desc = I40E_TX_DESC(tx_ring, i); + i -= tx_ring->count; - for (; budget; budget--) { - struct i40e_tx_desc *eop_desc; - - eop_desc = tx_buf->next_to_watch; + do { + struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; /* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break; + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + /* if the descriptor isn't done, no work yet to do */ if (!(eop_desc->cmd_type_offset_bsz & cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) @@ -349,44 +348,67 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) /* clear next_to_watch to prevent false hangs */ tx_buf->next_to_watch = NULL; - tx_buf->time_stamp = 0; - /* set memory barrier before eop_desc is verified */ - rmb(); + /* update the statistics for this packet */ + total_bytes += tx_buf->bytecount; + total_packets += tx_buf->gso_segs; - do { - i40e_unmap_tx_resource(tx_ring, tx_buf); + /* free the skb */ + dev_kfree_skb_any(tx_buf->skb); - /* clear dtype status */ - tx_desc->cmd_type_offset_bsz &= - ~cpu_to_le64(I40E_TXD_QW1_DTYPE_MASK); + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); - if (likely(tx_desc == eop_desc)) { - eop_desc = NULL; + /* clear tx_buffer data */ + tx_buf->skb = NULL; + dma_unmap_len_set(tx_buf, len, 0); - dev_kfree_skb_any(tx_buf->skb); - tx_buf->skb = NULL; - - total_bytes += tx_buf->bytecount; - total_packets += tx_buf->gso_segs; - } + /* unmap remaining buffers */ + while (tx_desc != eop_desc) { tx_buf++; tx_desc++; i++; - if (unlikely(i == tx_ring->count)) { - i = 0; + if (unlikely(!i)) { + i -= tx_ring->count; tx_buf = tx_ring->tx_bi; tx_desc = I40E_TX_DESC(tx_ring, 0); } - } while (eop_desc); - } + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buf, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buf++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buf = tx_ring->tx_bi; + tx_desc = I40E_TX_DESC(tx_ring, 0); + } + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + i += tx_ring->count; tx_ring->next_to_clean = i; tx_ring->tx_stats.bytes += total_bytes; tx_ring->tx_stats.packets += total_packets; tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; + if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) { /* schedule immediate reset if we believe we hung */ dev_info(tx_ring->dev, "Detected Tx Unit Hang\n" @@ -1515,68 +1537,71 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, struct i40e_tx_buffer *first, u32 tx_flags, const u8 hdr_len, u32 td_cmd, u32 td_offset) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; unsigned int data_len = skb->data_len; unsigned int size = skb_headlen(skb); - struct device *dev = tx_ring->dev; - u32 paylen = skb->len - hdr_len; - u16 i = tx_ring->next_to_use; + struct skb_frag_struct *frag; struct i40e_tx_buffer *tx_bi; struct i40e_tx_desc *tx_desc; - u32 buf_offset = 0; + u16 i = tx_ring->next_to_use; u32 td_tag = 0; dma_addr_t dma; u16 gso_segs; - dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_error; - if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >> I40E_TX_FLAGS_VLAN_SHIFT; } + if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) + gso_segs = skb_shinfo(skb)->gso_segs; + else + gso_segs = 1; + + /* multiply data chunks by size of headers */ + first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len); + first->gso_segs = gso_segs; + first->skb = skb; + first->tx_flags = tx_flags; + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + tx_desc = I40E_TX_DESC(tx_ring, i); - for (;;) { - while (size > I40E_MAX_DATA_PER_TXD) { - tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); + tx_bi = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_bi, len, size); + dma_unmap_addr_set(tx_bi, dma, dma); + + tx_desc->buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > I40E_MAX_DATA_PER_TXD)) { tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, I40E_MAX_DATA_PER_TXD, td_tag); - buf_offset += I40E_MAX_DATA_PER_TXD; - size -= I40E_MAX_DATA_PER_TXD; - tx_desc++; i++; if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); i = 0; } - } - tx_bi = &tx_ring->tx_bi[i]; - dma_unmap_len_set(tx_bi, len, buf_offset + size); - dma_unmap_addr_set(tx_bi, dma, dma); - tx_bi->tx_flags = tx_flags; + dma += I40E_MAX_DATA_PER_TXD; + size -= I40E_MAX_DATA_PER_TXD; - tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset); - tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, - size, td_tag); + tx_desc->buffer_addr = cpu_to_le64(dma); + } if (likely(!data_len)) break; - size = skb_frag_size(frag); - data_len -= size; - buf_offset = 0; - tx_flags |= I40E_TX_FLAGS_MAPPED_AS_PAGE; - - dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); - if (dma_mapping_error(dev, dma)) - goto dma_error; + tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, + size, td_tag); tx_desc++; i++; @@ -1585,31 +1610,21 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, i = 0; } - frag++; - } - - tx_desc->cmd_type_offset_bsz |= - cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); - - i++; - if (i == tx_ring->count) - i = 0; + size = skb_frag_size(frag); + data_len -= size; - tx_ring->next_to_use = i; + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size, + DMA_TO_DEVICE); - if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) - gso_segs = skb_shinfo(skb)->gso_segs; - else - gso_segs = 1; + tx_bi = &tx_ring->tx_bi[i]; + } - /* multiply data chunks by size of headers */ - tx_bi->bytecount = paylen + (gso_segs * hdr_len); - tx_bi->gso_segs = gso_segs; - tx_bi->skb = skb; + tx_desc->cmd_type_offset_bsz = + build_ctob(td_cmd, td_offset, size, td_tag) | + cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); - /* set the timestamp and next to watch values */ + /* set the timestamp */ first->time_stamp = jiffies; - first->next_to_watch = tx_desc; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -1618,16 +1633,27 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, */ wmb(); + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + + /* notify HW of packet */ writel(i, tx_ring->tail); + return; dma_error: - dev_info(dev, "TX DMA map failed\n"); + dev_info(tx_ring->dev, "TX DMA map failed\n"); /* clear dma mappings for failed tx_bi map */ for (;;) { tx_bi = &tx_ring->tx_bi[i]; - i40e_unmap_tx_resource(tx_ring, tx_bi); + i40e_unmap_and_free_tx_resource(tx_ring, tx_bi); if (tx_bi == first) break; if (i == 0) @@ -1635,8 +1661,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, i--; } - dev_kfree_skb_any(skb); - tx_ring->next_to_use = i; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 711f549384897..2cb1338fb7942 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -103,7 +103,6 @@ #define I40E_TX_FLAGS_FCCRC (u32)(1 << 6) #define I40E_TX_FLAGS_FSO (u32)(1 << 7) #define I40E_TX_FLAGS_TXSW (u32)(1 << 8) -#define I40E_TX_FLAGS_MAPPED_AS_PAGE (u32)(1 << 9) #define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 #define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 From b194130627580519e63665660db055bfe82b6949 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:32 +0000 Subject: [PATCH 06/13] i40e: Drop dead code and flags from Tx hotpath Drop Tx flag and TXSW which is tested but never set. As a result of this change we can drop a complicated check that always resulted in the final result of i40e_tx_csum being equal to the CHECKSUM_PARTIAL value. As such we can replace the entire function call with just a check for skb->summed == CHECKSUM_PARTIAL. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 31 ++++----------------- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 - 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index e96de7567856d..52dfac206f755 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1299,27 +1299,6 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb, return 0; } -/** - * i40e_tx_csum - is checksum offload requested - * @tx_ring: ptr to the ring to send - * @skb: ptr to the skb we're sending - * @tx_flags: the collected send information - * @protocol: the send protocol - * - * Returns true if checksum offload is requested - **/ -static bool i40e_tx_csum(struct i40e_ring *tx_ring, struct sk_buff *skb, - u32 tx_flags, __be16 protocol) -{ - if ((skb->ip_summed != CHECKSUM_PARTIAL) && - !(tx_flags & I40E_TX_FLAGS_TXSW)) { - if (!(tx_flags & I40E_TX_FLAGS_HW_VLAN)) - return false; - } - - return skb->ip_summed == CHECKSUM_PARTIAL; -} - /** * i40e_tso - set up the tso context descriptor * @tx_ring: ptr to the ring to send @@ -1785,16 +1764,16 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, skb_tx_timestamp(skb); + /* always enable CRC insertion offload */ + td_cmd |= I40E_TX_DESC_CMD_ICRC; + /* Always offload the checksum, since it's in the data descriptor */ - if (i40e_tx_csum(tx_ring, skb, tx_flags, protocol)) + if (skb->ip_summed == CHECKSUM_PARTIAL) { tx_flags |= I40E_TX_FLAGS_CSUM; - /* always enable offload insertion */ - td_cmd |= I40E_TX_DESC_CMD_ICRC; - - if (tx_flags & I40E_TX_FLAGS_CSUM) i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset, tx_ring, &cd_tunneling); + } i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss, cd_tunneling, cd_l2tag2); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 2cb1338fb7942..e5142476a7f0f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -102,7 +102,6 @@ #define I40E_TX_FLAGS_IPV6 (u32)(1 << 5) #define I40E_TX_FLAGS_FCCRC (u32)(1 << 6) #define I40E_TX_FLAGS_FSO (u32)(1 << 7) -#define I40E_TX_FLAGS_TXSW (u32)(1 << 8) #define I40E_TX_FLAGS_VLAN_MASK 0xffff0000 #define I40E_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 #define I40E_TX_FLAGS_VLAN_PRIO_SHIFT 29 From 7070ce0a6419a118842298bc967061ad6cea40db Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:37 +0000 Subject: [PATCH 07/13] i40e: Add support for Tx byte queue limits Implement BQL (byte queue limit) support in i40e. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 52dfac206f755..ad2818f26821e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -240,6 +240,13 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; + + if (!tx_ring->netdev) + return; + + /* cleanup Tx queue statistics */ + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index)); } /** @@ -436,6 +443,10 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) return true; } + netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + total_packets, total_bytes); + #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -1602,6 +1613,10 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, build_ctob(td_cmd, td_offset, size, td_tag) | cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT); + netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->queue_index), + first->bytecount); + /* set the timestamp */ first->time_stamp = jiffies; From a114d0a6aca7f96f46be93539665dbb28bdf1a73 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:43 +0000 Subject: [PATCH 08/13] i40e: Split bytes and packets from Rx/Tx stats This makes it so that the Tx and Rx byte and packet counts are separated from the rest of the statistics. This allows for better isolation of these stats when we move them into the 64 bit statistics. Simplify things by re-ordering how the stats display in ethtool. Instead of displaying all of the Tx queues as a block, followed by all the Rx queues, the new order is Tx[0], Rx[0], Tx[1], Rx[1], ..., Tx[n], Rx[n]. This reduces the loops and cleans up the display for testing purposes since it is very easy to verify if flow director is doing the right thing as the Tx and Rx queue pair are shown in pairs. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 8 ++++---- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 14 +++++--------- drivers/net/ethernet/intel/i40e/i40e_main.c | 12 ++++++++---- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 12 ++++++------ drivers/net/ethernet/intel/i40e/i40e_txrx.h | 8 +++++--- 5 files changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 0b01c61f70c22..2b6655bf1f3d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -512,8 +512,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->rx_rings[i].ring_active); dev_info(&pf->pdev->dev, " rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n", - i, vsi->rx_rings[i].rx_stats.packets, - vsi->rx_rings[i].rx_stats.bytes, + i, vsi->rx_rings[i].stats.packets, + vsi->rx_rings[i].stats.bytes, vsi->rx_rings[i].rx_stats.non_eop_descs); dev_info(&pf->pdev->dev, " rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n", @@ -556,8 +556,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) vsi->tx_rings[i].ring_active); dev_info(&pf->pdev->dev, " tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n", - i, vsi->tx_rings[i].tx_stats.packets, - vsi->tx_rings[i].tx_stats.bytes, + i, vsi->tx_rings[i].stats.packets, + vsi->tx_rings[i].stats.bytes, vsi->tx_rings[i].tx_stats.restart_queue); dev_info(&pf->pdev->dev, " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 9a76b8cec76c0..8754c6fa63247 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -587,13 +587,11 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } - for (j = 0; j < vsi->num_queue_pairs; j++) { - data[i++] = vsi->tx_rings[j].tx_stats.packets; - data[i++] = vsi->tx_rings[j].tx_stats.bytes; - } - for (j = 0; j < vsi->num_queue_pairs; j++) { - data[i++] = vsi->rx_rings[j].rx_stats.packets; - data[i++] = vsi->rx_rings[j].rx_stats.bytes; + for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) { + data[i] = vsi->tx_rings[j].stats.packets; + data[i + 1] = vsi->tx_rings[j].stats.bytes; + data[i + 2] = vsi->rx_rings[j].stats.packets; + data[i + 3] = vsi->rx_rings[j].stats.bytes; } if (vsi == pf->vsi[pf->lan_vsi]) { for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) { @@ -641,8 +639,6 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i); p += ETH_GSTRING_LEN; - } - for (i = 0; i < vsi->num_queue_pairs; i++) { snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 657babe82c9c6..d1b5bae15ea8d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -376,8 +376,12 @@ void i40e_vsi_reset_stats(struct i40e_vsi *vsi) memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets)); if (vsi->rx_rings) for (i = 0; i < vsi->num_queue_pairs; i++) { + memset(&vsi->rx_rings[i].stats, 0 , + sizeof(vsi->rx_rings[i].stats)); memset(&vsi->rx_rings[i].rx_stats, 0 , sizeof(vsi->rx_rings[i].rx_stats)); + memset(&vsi->tx_rings[i].stats, 0 , + sizeof(vsi->tx_rings[i].stats)); memset(&vsi->tx_rings[i].tx_stats, 0, sizeof(vsi->tx_rings[i].tx_stats)); } @@ -708,14 +712,14 @@ void i40e_update_stats(struct i40e_vsi *vsi) struct i40e_ring *p; p = &vsi->rx_rings[q]; - rx_b += p->rx_stats.bytes; - rx_p += p->rx_stats.packets; + rx_b += p->stats.bytes; + rx_p += p->stats.packets; rx_buf += p->rx_stats.alloc_rx_buff_failed; rx_page += p->rx_stats.alloc_rx_page_failed; p = &vsi->tx_rings[q]; - tx_b += p->tx_stats.bytes; - tx_p += p->tx_stats.packets; + tx_b += p->stats.bytes; + tx_p += p->stats.packets; tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ad2818f26821e..3e73bc093737b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -305,14 +305,14 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet. */ - if ((tx_ring->tx_stats.tx_done_old == tx_ring->tx_stats.packets) && + if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && tx_pending) { /* make sure it is true for two checks in a row */ ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } else { /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_ring->tx_stats.packets; + tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } @@ -411,8 +411,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) i += tx_ring->count; tx_ring->next_to_clean = i; - tx_ring->tx_stats.bytes += total_bytes; - tx_ring->tx_stats.packets += total_packets; + tx_ring->stats.bytes += total_bytes; + tx_ring->stats.packets += total_packets; tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; @@ -1075,8 +1075,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) } rx_ring->next_to_clean = i; - rx_ring->rx_stats.packets += total_rx_packets; - rx_ring->rx_stats.bytes += total_rx_bytes; + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index e5142476a7f0f..7f3f7e3e4238b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -126,17 +126,18 @@ struct i40e_rx_buffer { unsigned int page_offset; }; -struct i40e_tx_queue_stats { +struct i40e_queue_stats { u64 packets; u64 bytes; +}; + +struct i40e_tx_queue_stats { u64 restart_queue; u64 tx_busy; u64 tx_done_old; }; struct i40e_rx_queue_stats { - u64 packets; - u64 bytes; u64 non_eop_descs; u64 alloc_rx_page_failed; u64 alloc_rx_buff_failed; @@ -215,6 +216,7 @@ struct i40e_ring { bool ring_active; /* is ring online or not */ /* stats structs */ + struct i40e_queue_stats stats; union { struct i40e_tx_queue_stats tx_stats; struct i40e_rx_queue_stats rx_stats; From 493fb30011b3ab5173cef96f1d1ce126da051792 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 07:01:44 +0000 Subject: [PATCH 09/13] i40e: Move q_vectors from pointer to array to array of pointers Allocate the q_vectors individually. The advantage to this is that it allows for easier freeing and allocation. In addition it makes it so that we could do node specific allocations at some point in the future. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 5 +- .../net/ethernet/intel/i40e/i40e_debugfs.c | 11 +- .../net/ethernet/intel/i40e/i40e_ethtool.c | 4 +- drivers/net/ethernet/intel/i40e/i40e_main.c | 152 ++++++++++++------ 4 files changed, 112 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index b5252eb8a6c75..789304e43a0e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -366,7 +366,7 @@ struct i40e_vsi { u8 dtype; /* List of q_vectors allocated to this VSI */ - struct i40e_q_vector *q_vectors; + struct i40e_q_vector **q_vectors; int num_q_vectors; int base_vector; @@ -422,8 +422,9 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ - char name[IFNAMSIZ + 9]; cpumask_t affinity_mask; + struct rcu_head rcu; /* to avoid race with update stats on free */ + char name[IFNAMSIZ + 9]; } ____cacheline_internodealigned_in_smp; /* lan device */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 2b6655bf1f3d1..44e3fa43af619 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -586,15 +586,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n", vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype); - if (vsi->q_vectors) { - for (i = 0; i < vsi->num_q_vectors; i++) { - dev_info(&pf->pdev->dev, - " q_vectors[%i]: base index = %ld\n", - i, ((long int)*vsi->q_vectors[i].rx.ring- - (long int)*vsi->q_vectors[0].rx.ring)/ - sizeof(struct i40e_ring)); - } - } dev_info(&pf->pdev->dev, " num_q_vectors = %i, base_vector = %i\n", vsi->num_q_vectors, vsi->base_vector); @@ -1995,7 +1986,7 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, goto netdev_ops_write_done; } for (i = 0; i < vsi->num_q_vectors; i++) - napi_schedule(&vsi->q_vectors[i].napi); + napi_schedule(&vsi->q_vectors[i]->napi); dev_info(&pf->pdev->dev, "napi called\n"); } else { dev_info(&pf->pdev->dev, "unknown command '%s'\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 8754c6fa63247..bf607dabe2363 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -906,8 +906,8 @@ static int i40e_set_coalesce(struct net_device *netdev, } vector = vsi->base_vector; - q_vector = vsi->q_vectors; - for (i = 0; i < vsi->num_q_vectors; i++, vector++, q_vector++) { + for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + q_vector = vsi->q_vectors[i]; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr); q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d1b5bae15ea8d..a090815a6dd97 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2358,8 +2358,8 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) */ qp = vsi->base_queue; vector = vsi->base_vector; - q_vector = vsi->q_vectors; - for (i = 0; i < vsi->num_q_vectors; i++, q_vector++, vector++) { + for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + q_vector = vsi->q_vectors[i]; q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); q_vector->rx.latency_range = I40E_LOW_LATENCY; wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), @@ -2439,7 +2439,7 @@ static void i40e_enable_misc_int_causes(struct i40e_hw *hw) **/ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) { - struct i40e_q_vector *q_vector = vsi->q_vectors; + struct i40e_q_vector *q_vector = vsi->q_vectors[0]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; u32 val; @@ -2558,7 +2558,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) int vector, err; for (vector = 0; vector < q_vectors; vector++) { - struct i40e_q_vector *q_vector = &(vsi->q_vectors[vector]); + struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; if (q_vector->tx.ring[0] && q_vector->rx.ring[0]) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -2709,7 +2709,7 @@ static irqreturn_t i40e_intr(int irq, void *data) i40e_flush(hw); if (!test_bit(__I40E_DOWN, &pf->state)) - napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0].napi); + napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0]->napi); } if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { @@ -2785,7 +2785,7 @@ static irqreturn_t i40e_intr(int irq, void *data) **/ static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx) { - struct i40e_q_vector *q_vector = &(vsi->q_vectors[v_idx]); + struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *rx_ring = &(vsi->rx_rings[r_idx]); rx_ring->q_vector = q_vector; @@ -2803,7 +2803,7 @@ static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx) **/ static void map_vector_to_txq(struct i40e_vsi *vsi, int v_idx, int t_idx) { - struct i40e_q_vector *q_vector = &(vsi->q_vectors[v_idx]); + struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *tx_ring = &(vsi->tx_rings[t_idx]); tx_ring->q_vector = q_vector; @@ -2891,7 +2891,7 @@ static void i40e_netpoll(struct net_device *netdev) pf->flags |= I40E_FLAG_IN_NETPOLL; if (pf->flags & I40E_FLAG_MSIX_ENABLED) { for (i = 0; i < vsi->num_q_vectors; i++) - i40e_msix_clean_rings(0, &vsi->q_vectors[i]); + i40e_msix_clean_rings(0, vsi->q_vectors[i]); } else { i40e_intr(pf->pdev->irq, netdev); } @@ -3077,14 +3077,14 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) u16 vector = i + base; /* free only the irqs that were actually requested */ - if (vsi->q_vectors[i].num_ringpairs == 0) + if (vsi->q_vectors[i]->num_ringpairs == 0) continue; /* clear the affinity_mask in the IRQ descriptor */ irq_set_affinity_hint(pf->msix_entries[vector].vector, NULL); free_irq(pf->msix_entries[vector].vector, - &vsi->q_vectors[i]); + vsi->q_vectors[i]); /* Tear down the interrupt queue link list * @@ -3167,6 +3167,38 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) } } +/** + * i40e_free_q_vector - Free memory allocated for specific interrupt vector + * @vsi: the VSI being configured + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx) +{ + struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; + int r_idx; + + if (!q_vector) + return; + + /* disassociate q_vector from rings */ + for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++) + q_vector->tx.ring[r_idx]->q_vector = NULL; + for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++) + q_vector->rx.ring[r_idx]->q_vector = NULL; + + /* only VSI w/ an associated netdev is set up w/ NAPI */ + if (vsi->netdev) + netif_napi_del(&q_vector->napi); + + vsi->q_vectors[v_idx] = NULL; + + kfree_rcu(q_vector, rcu); +} + /** * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors * @vsi: the VSI being un-configured @@ -3178,24 +3210,8 @@ static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi) { int v_idx; - for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) { - struct i40e_q_vector *q_vector = &vsi->q_vectors[v_idx]; - int r_idx; - - if (!q_vector) - continue; - - /* disassociate q_vector from rings */ - for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++) - q_vector->tx.ring[r_idx]->q_vector = NULL; - for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++) - q_vector->rx.ring[r_idx]->q_vector = NULL; - - /* only VSI w/ an associated netdev is set up w/ NAPI */ - if (vsi->netdev) - netif_napi_del(&q_vector->napi); - } - kfree(vsi->q_vectors); + for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) + i40e_free_q_vector(vsi, v_idx); } /** @@ -3245,7 +3261,7 @@ static void i40e_napi_enable_all(struct i40e_vsi *vsi) return; for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_enable(&vsi->q_vectors[q_idx].napi); + napi_enable(&vsi->q_vectors[q_idx]->napi); } /** @@ -3260,7 +3276,7 @@ static void i40e_napi_disable_all(struct i40e_vsi *vsi) return; for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++) - napi_disable(&vsi->q_vectors[q_idx].napi); + napi_disable(&vsi->q_vectors[q_idx]->napi); } /** @@ -4945,6 +4961,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) { int ret = -ENODEV; struct i40e_vsi *vsi; + int sz_vectors; int vsi_idx; int i; @@ -4970,14 +4987,14 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) vsi_idx = i; /* Found one! */ } else { ret = -ENODEV; - goto err_alloc_vsi; /* out of VSI slots! */ + goto unlock_pf; /* out of VSI slots! */ } pf->next_vsi = ++i; vsi = kzalloc(sizeof(*vsi), GFP_KERNEL); if (!vsi) { ret = -ENOMEM; - goto err_alloc_vsi; + goto unlock_pf; } vsi->type = type; vsi->back = pf; @@ -4992,12 +5009,25 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) i40e_set_num_rings_in_vsi(vsi); + /* allocate memory for q_vector pointers */ + sz_vectors = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors; + vsi->q_vectors = kzalloc(sz_vectors, GFP_KERNEL); + if (!vsi->q_vectors) { + ret = -ENOMEM; + goto err_vectors; + } + /* Setup default MSIX irq handler for VSI */ i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings); pf->vsi[vsi_idx] = vsi; ret = vsi_idx; -err_alloc_vsi: + goto unlock_pf; + +err_vectors: + pf->next_vsi = i - 1; + kfree(vsi); +unlock_pf: mutex_unlock(&pf->switch_mutex); return ret; } @@ -5038,6 +5068,9 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx); i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx); + /* free the ring and vector containers */ + kfree(vsi->q_vectors); + pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi) pf->next_vsi = vsi->idx; @@ -5256,6 +5289,35 @@ static int i40e_init_msix(struct i40e_pf *pf) return err; } +/** + * i40e_alloc_q_vector - Allocate memory for a single interrupt vector + * @vsi: the VSI being configured + * @v_idx: index of the vector in the vsi struct + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + **/ +static int i40e_alloc_q_vector(struct i40e_vsi *vsi, int v_idx) +{ + struct i40e_q_vector *q_vector; + + /* allocate q_vector */ + q_vector = kzalloc(sizeof(struct i40e_q_vector), GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + + q_vector->vsi = vsi; + q_vector->v_idx = v_idx; + cpumask_set_cpu(v_idx, &q_vector->affinity_mask); + if (vsi->netdev) + netif_napi_add(vsi->netdev, &q_vector->napi, + i40e_napi_poll, vsi->work_limit); + + /* tie q_vector and vsi together */ + vsi->q_vectors[v_idx] = q_vector; + + return 0; +} + /** * i40e_alloc_q_vectors - Allocate memory for interrupt vectors * @vsi: the VSI being configured @@ -5267,6 +5329,7 @@ static int i40e_alloc_q_vectors(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int v_idx, num_q_vectors; + int err; /* if not MSIX, give the one vector only to the LAN VSI */ if (pf->flags & I40E_FLAG_MSIX_ENABLED) @@ -5276,22 +5339,19 @@ static int i40e_alloc_q_vectors(struct i40e_vsi *vsi) else return -EINVAL; - vsi->q_vectors = kcalloc(num_q_vectors, - sizeof(struct i40e_q_vector), - GFP_KERNEL); - if (!vsi->q_vectors) - return -ENOMEM; - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - vsi->q_vectors[v_idx].vsi = vsi; - vsi->q_vectors[v_idx].v_idx = v_idx; - cpumask_set_cpu(v_idx, &vsi->q_vectors[v_idx].affinity_mask); - if (vsi->netdev) - netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx].napi, - i40e_napi_poll, vsi->work_limit); + err = i40e_alloc_q_vector(vsi, v_idx); + if (err) + goto err_out; } return 0; + +err_out: + while (v_idx--) + i40e_free_q_vector(vsi, v_idx); + + return err; } /** @@ -5958,7 +6018,7 @@ static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi) int ret = -ENOENT; struct i40e_pf *pf = vsi->back; - if (vsi->q_vectors) { + if (vsi->q_vectors[0]) { dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n", vsi->seid); return -EEXIST; From cd0b6fa65692a2bc150c3228008b812b1f45aed0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:53 +0000 Subject: [PATCH 10/13] i40e: Replace ring container array with linked list This replaces the ring container array with a linked list. The idea is to make the logic much easier to deal with since this will allow us to call a simple helper function from the q_vectors to go through the entire list. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 84 ++++++++++----------- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 19 ++--- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 8 +- 3 files changed, 58 insertions(+), 53 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a090815a6dd97..c74ac585c6395 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2516,7 +2516,7 @@ static irqreturn_t i40e_msix_clean_rings(int irq, void *data) { struct i40e_q_vector *q_vector = data; - if (!q_vector->tx.ring[0] && !q_vector->rx.ring[0]) + if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED; napi_schedule(&q_vector->napi); @@ -2533,7 +2533,7 @@ static irqreturn_t i40e_fdir_clean_rings(int irq, void *data) { struct i40e_q_vector *q_vector = data; - if (!q_vector->tx.ring[0] && !q_vector->rx.ring[0]) + if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED; pr_info("fdir ring cleaning needed\n"); @@ -2560,14 +2560,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; - if (q_vector->tx.ring[0] && q_vector->rx.ring[0]) { + if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "TxRx", rx_int_idx++); tx_int_idx++; - } else if (q_vector->rx.ring[0]) { + } else if (q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "rx", rx_int_idx++); - } else if (q_vector->tx.ring[0]) { + } else if (q_vector->tx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "tx", tx_int_idx++); } else { @@ -2778,40 +2778,26 @@ static irqreturn_t i40e_intr(int irq, void *data) } /** - * i40e_map_vector_to_rxq - Assigns the Rx queue to the vector + * i40e_map_vector_to_qp - Assigns the queue pair to the vector * @vsi: the VSI being configured * @v_idx: vector index - * @r_idx: rx queue index + * @qp_idx: queue pair index **/ -static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx) +static void map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct i40e_ring *rx_ring = &(vsi->rx_rings[r_idx]); - - rx_ring->q_vector = q_vector; - q_vector->rx.ring[q_vector->rx.count] = rx_ring; - q_vector->rx.count++; - q_vector->rx.latency_range = I40E_LOW_LATENCY; - q_vector->vsi = vsi; -} - -/** - * i40e_map_vector_to_txq - Assigns the Tx queue to the vector - * @vsi: the VSI being configured - * @v_idx: vector index - * @t_idx: tx queue index - **/ -static void map_vector_to_txq(struct i40e_vsi *vsi, int v_idx, int t_idx) -{ - struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct i40e_ring *tx_ring = &(vsi->tx_rings[t_idx]); + struct i40e_ring *tx_ring = &(vsi->tx_rings[qp_idx]); + struct i40e_ring *rx_ring = &(vsi->rx_rings[qp_idx]); tx_ring->q_vector = q_vector; - q_vector->tx.ring[q_vector->tx.count] = tx_ring; + tx_ring->next = q_vector->tx.ring; + q_vector->tx.ring = tx_ring; q_vector->tx.count++; - q_vector->tx.latency_range = I40E_LOW_LATENCY; - q_vector->num_ringpairs++; - q_vector->vsi = vsi; + + rx_ring->q_vector = q_vector; + rx_ring->next = q_vector->rx.ring; + q_vector->rx.ring = rx_ring; + q_vector->rx.count++; } /** @@ -2827,7 +2813,7 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi) { int qp_remaining = vsi->num_queue_pairs; int q_vectors = vsi->num_q_vectors; - int qp_per_vector; + int num_ringpairs; int v_start = 0; int qp_idx = 0; @@ -2835,11 +2821,21 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi) * group them so there are multiple queues per vector. */ for (; v_start < q_vectors && qp_remaining; v_start++) { - qp_per_vector = DIV_ROUND_UP(qp_remaining, q_vectors - v_start); - for (; qp_per_vector; - qp_per_vector--, qp_idx++, qp_remaining--) { - map_vector_to_rxq(vsi, v_start, qp_idx); - map_vector_to_txq(vsi, v_start, qp_idx); + struct i40e_q_vector *q_vector = vsi->q_vectors[v_start]; + + num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start); + + q_vector->num_ringpairs = num_ringpairs; + + q_vector->rx.count = 0; + q_vector->tx.count = 0; + q_vector->rx.ring = NULL; + q_vector->tx.ring = NULL; + + while (num_ringpairs--) { + map_vector_to_qp(vsi, v_start, qp_idx); + qp_idx++; + qp_remaining--; } } } @@ -3179,16 +3175,17 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) static void i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; - int r_idx; + struct i40e_ring *ring; if (!q_vector) return; /* disassociate q_vector from rings */ - for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++) - q_vector->tx.ring[r_idx]->q_vector = NULL; - for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++) - q_vector->rx.ring[r_idx]->q_vector = NULL; + i40e_for_each_ring(ring, q_vector->tx) + ring->q_vector = NULL; + + i40e_for_each_ring(ring, q_vector->rx) + ring->q_vector = NULL; /* only VSI w/ an associated netdev is set up w/ NAPI */ if (vsi->netdev) @@ -5312,6 +5309,9 @@ static int i40e_alloc_q_vector(struct i40e_vsi *vsi, int v_idx) netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll, vsi->work_limit); + q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->tx.latency_range = I40E_LOW_LATENCY; + /* tie q_vector and vsi together */ vsi->q_vectors[v_idx] = q_vector; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3e73bc093737b..f153f3770346d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1100,27 +1100,28 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) struct i40e_q_vector *q_vector = container_of(napi, struct i40e_q_vector, napi); struct i40e_vsi *vsi = q_vector->vsi; + struct i40e_ring *ring; bool clean_complete = true; int budget_per_ring; - int i; if (test_bit(__I40E_DOWN, &vsi->state)) { napi_complete(napi); return 0; } + /* Since the actual Tx work is minimal, we can give the Tx a larger + * budget and be more aggressive about cleaning up the Tx descriptors. + */ + i40e_for_each_ring(ring, q_vector->tx) + clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit); + /* We attempt to distribute budget to each Rx queue fairly, but don't * allow the budget to go below 1 because that would exit polling early. - * Since the actual Tx work is minimal, we can give the Tx a larger - * budget and be more aggressive about cleaning up the Tx descriptors. */ budget_per_ring = max(budget/q_vector->num_ringpairs, 1); - for (i = 0; i < q_vector->num_ringpairs; i++) { - clean_complete &= i40e_clean_tx_irq(q_vector->tx.ring[i], - vsi->work_limit); - clean_complete &= i40e_clean_rx_irq(q_vector->rx.ring[i], - budget_per_ring); - } + + i40e_for_each_ring(ring, q_vector->rx) + clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring); /* If work not completed, return budget and polling will return */ if (!clean_complete) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 7f3f7e3e4238b..c2a6746a5ec90 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -180,6 +180,7 @@ enum i40e_ring_state_t { /* struct that defines a descriptor ring, associated with a VSI */ struct i40e_ring { + struct i40e_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ @@ -236,9 +237,8 @@ enum i40e_latency_range { }; struct i40e_ring_container { -#define I40E_MAX_RINGPAIR_PER_VECTOR 8 /* array of pointers to rings */ - struct i40e_ring *ring[I40E_MAX_RINGPAIR_PER_VECTOR]; + struct i40e_ring *ring; unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_packets; /* total packets processed this int */ u16 count; @@ -246,6 +246,10 @@ struct i40e_ring_container { u16 itr; }; +/* iterator for handling rings in ring container */ +#define i40e_for_each_ring(pos, head) \ + for (pos = (head).ring; pos != NULL; pos = pos->next) + void i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); From 9f65e15b4f982391eef795a74adcc6580f0d7c53 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:00:58 +0000 Subject: [PATCH 11/13] i40e: Move rings from pointer to array to array of pointers Allocate the queue pairs individually instead of as a group. This allows for much easier queue management as it is possible to dynamically resize the queues without having to free and allocate the entire block. Ease statistic collection by treating Tx/Rx queue pairs as a single unit. Each pair is allocated together and starts with a Tx queue and ends with an Rx queue. By ordering them this way it is possible to know the Rx offset based on a pointer to the Tx queue. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 6 +- .../net/ethernet/intel/i40e/i40e_debugfs.c | 195 +++++++++--------- .../net/ethernet/intel/i40e/i40e_ethtool.c | 40 ++-- drivers/net/ethernet/intel/i40e/i40e_main.c | 142 +++++++------ drivers/net/ethernet/intel/i40e/i40e_txrx.c | 4 +- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 + 6 files changed, 204 insertions(+), 185 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 789304e43a0e8..c06a76ca9aaa9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -347,9 +347,9 @@ struct i40e_vsi { u32 rx_buf_failed; u32 rx_page_failed; - /* These are arrays of rings, allocated at run-time */ - struct i40e_ring *rx_rings; - struct i40e_ring *tx_rings; + /* These are containers of ring pointers, allocated at run-time */ + struct i40e_ring **rx_rings; + struct i40e_ring **tx_rings; u16 work_limit; /* high bit set means dynamic, use accessor routines to read/write. diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 44e3fa43af619..19e248ff6c77c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -258,12 +258,12 @@ static ssize_t i40e_dbg_dump_write(struct file *filp, for (i = 0; i < vsi->num_queue_pairs; i++) { len = sizeof(struct i40e_tx_buffer); - memcpy(p, vsi->tx_rings[i].tx_bi, len); + memcpy(p, vsi->tx_rings[i]->tx_bi, len); p += len; } for (i = 0; i < vsi->num_queue_pairs; i++) { len = sizeof(struct i40e_rx_buffer); - memcpy(p, vsi->rx_rings[i].rx_bi, len); + memcpy(p, vsi->rx_rings[i]->rx_bi, len); p += len; } @@ -484,99 +484,104 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n", vsi->tx_restart, vsi->tx_busy, vsi->rx_buf_failed, vsi->rx_page_failed); - if (vsi->rx_rings) { - for (i = 0; i < vsi->num_queue_pairs; i++) { - dev_info(&pf->pdev->dev, - " rx_rings[%i]: desc = %p\n", - i, vsi->rx_rings[i].desc); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n", - i, vsi->rx_rings[i].dev, - vsi->rx_rings[i].netdev, - vsi->rx_rings[i].rx_bi); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", - i, vsi->rx_rings[i].state, - vsi->rx_rings[i].queue_index, - vsi->rx_rings[i].reg_idx); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d, dtype = %d\n", - i, vsi->rx_rings[i].rx_hdr_len, - vsi->rx_rings[i].rx_buf_len, - vsi->rx_rings[i].dtype); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, vsi->rx_rings[i].hsplit, - vsi->rx_rings[i].next_to_use, - vsi->rx_rings[i].next_to_clean, - vsi->rx_rings[i].ring_active); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n", - i, vsi->rx_rings[i].stats.packets, - vsi->rx_rings[i].stats.bytes, - vsi->rx_rings[i].rx_stats.non_eop_descs); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n", - i, - vsi->rx_rings[i].rx_stats.alloc_rx_page_failed, - vsi->rx_rings[i].rx_stats.alloc_rx_buff_failed); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, vsi->rx_rings[i].size, - (long unsigned int)vsi->rx_rings[i].dma); - dev_info(&pf->pdev->dev, - " rx_rings[%i]: vsi = %p, q_vector = %p\n", - i, vsi->rx_rings[i].vsi, - vsi->rx_rings[i].q_vector); - } + rcu_read_lock(); + for (i = 0; i < vsi->num_queue_pairs; i++) { + struct i40e_ring *rx_ring = ACCESS_ONCE(vsi->rx_rings[i]); + if (!rx_ring) + continue; + + dev_info(&pf->pdev->dev, + " rx_rings[%i]: desc = %p\n", + i, rx_ring->desc); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n", + i, rx_ring->dev, + rx_ring->netdev, + rx_ring->rx_bi); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", + i, rx_ring->state, + rx_ring->queue_index, + rx_ring->reg_idx); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d, dtype = %d\n", + i, rx_ring->rx_hdr_len, + rx_ring->rx_buf_len, + rx_ring->dtype); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", + i, rx_ring->hsplit, + rx_ring->next_to_use, + rx_ring->next_to_clean, + rx_ring->ring_active); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n", + i, rx_ring->stats.packets, + rx_ring->stats.bytes, + rx_ring->rx_stats.non_eop_descs); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n", + i, + rx_ring->rx_stats.alloc_rx_page_failed, + rx_ring->rx_stats.alloc_rx_buff_failed); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: size = %i, dma = 0x%08lx\n", + i, rx_ring->size, + (long unsigned int)rx_ring->dma); + dev_info(&pf->pdev->dev, + " rx_rings[%i]: vsi = %p, q_vector = %p\n", + i, rx_ring->vsi, + rx_ring->q_vector); } - if (vsi->tx_rings) { - for (i = 0; i < vsi->num_queue_pairs; i++) { - dev_info(&pf->pdev->dev, - " tx_rings[%i]: desc = %p\n", - i, vsi->tx_rings[i].desc); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n", - i, vsi->tx_rings[i].dev, - vsi->tx_rings[i].netdev, - vsi->tx_rings[i].tx_bi); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", - i, vsi->tx_rings[i].state, - vsi->tx_rings[i].queue_index, - vsi->tx_rings[i].reg_idx); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: dtype = %d\n", - i, vsi->tx_rings[i].dtype); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", - i, vsi->tx_rings[i].hsplit, - vsi->tx_rings[i].next_to_use, - vsi->tx_rings[i].next_to_clean, - vsi->tx_rings[i].ring_active); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n", - i, vsi->tx_rings[i].stats.packets, - vsi->tx_rings[i].stats.bytes, - vsi->tx_rings[i].tx_stats.restart_queue); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n", - i, - vsi->tx_rings[i].tx_stats.tx_busy, - vsi->tx_rings[i].tx_stats.tx_done_old); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: size = %i, dma = 0x%08lx\n", - i, vsi->tx_rings[i].size, - (long unsigned int)vsi->tx_rings[i].dma); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: vsi = %p, q_vector = %p\n", - i, vsi->tx_rings[i].vsi, - vsi->tx_rings[i].q_vector); - dev_info(&pf->pdev->dev, - " tx_rings[%i]: DCB tc = %d\n", - i, vsi->tx_rings[i].dcb_tc); - } + for (i = 0; i < vsi->num_queue_pairs; i++) { + struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); + if (!tx_ring) + continue; + dev_info(&pf->pdev->dev, + " tx_rings[%i]: desc = %p\n", + i, tx_ring->desc); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n", + i, tx_ring->dev, + tx_ring->netdev, + tx_ring->tx_bi); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n", + i, tx_ring->state, + tx_ring->queue_index, + tx_ring->reg_idx); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: dtype = %d\n", + i, tx_ring->dtype); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n", + i, tx_ring->hsplit, + tx_ring->next_to_use, + tx_ring->next_to_clean, + tx_ring->ring_active); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n", + i, tx_ring->stats.packets, + tx_ring->stats.bytes, + tx_ring->tx_stats.restart_queue); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: tx_stats: tx_busy = %lld, tx_done_old = %lld\n", + i, + tx_ring->tx_stats.tx_busy, + tx_ring->tx_stats.tx_done_old); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: size = %i, dma = 0x%08lx\n", + i, tx_ring->size, + (long unsigned int)tx_ring->dma); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: vsi = %p, q_vector = %p\n", + i, tx_ring->vsi, + tx_ring->q_vector); + dev_info(&pf->pdev->dev, + " tx_rings[%i]: DCB tc = %d\n", + i, tx_ring->dcb_tc); } + rcu_read_unlock(); dev_info(&pf->pdev->dev, " work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n", vsi->work_limit, vsi->rx_itr_setting, @@ -782,9 +787,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, return; } if (is_rx_ring) - ring = vsi->rx_rings[ring_id]; + ring = *vsi->rx_rings[ring_id]; else - ring = vsi->tx_rings[ring_id]; + ring = *vsi->tx_rings[ring_id]; if (cnt == 2) { dev_info(&pf->pdev->dev, "vsi = %02i %s ring = %02i\n", vsi_seid, is_rx_ring ? "rx" : "tx", ring_id); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index bf607dabe2363..50153ea2d63c6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -399,8 +399,8 @@ static void i40e_get_ringparam(struct net_device *netdev, ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS; ring->rx_mini_max_pending = 0; ring->rx_jumbo_max_pending = 0; - ring->rx_pending = vsi->rx_rings[0].count; - ring->tx_pending = vsi->tx_rings[0].count; + ring->rx_pending = vsi->rx_rings[0]->count; + ring->tx_pending = vsi->tx_rings[0]->count; ring->rx_mini_pending = 0; ring->rx_jumbo_pending = 0; } @@ -429,8 +429,8 @@ static int i40e_set_ringparam(struct net_device *netdev, new_rx_count = ALIGN(new_rx_count, I40E_REQ_DESCRIPTOR_MULTIPLE); /* if nothing to do return success */ - if ((new_tx_count == vsi->tx_rings[0].count) && - (new_rx_count == vsi->rx_rings[0].count)) + if ((new_tx_count == vsi->tx_rings[0]->count) && + (new_rx_count == vsi->rx_rings[0]->count)) return 0; while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state)) @@ -439,8 +439,8 @@ static int i40e_set_ringparam(struct net_device *netdev, if (!netif_running(vsi->netdev)) { /* simple case - set for the next time the netdev is started */ for (i = 0; i < vsi->num_queue_pairs; i++) { - vsi->tx_rings[i].count = new_tx_count; - vsi->rx_rings[i].count = new_rx_count; + vsi->tx_rings[i]->count = new_tx_count; + vsi->rx_rings[i]->count = new_rx_count; } goto done; } @@ -451,10 +451,10 @@ static int i40e_set_ringparam(struct net_device *netdev, */ /* alloc updated Tx resources */ - if (new_tx_count != vsi->tx_rings[0].count) { + if (new_tx_count != vsi->tx_rings[0]->count) { netdev_info(netdev, "Changing Tx descriptor count from %d to %d.\n", - vsi->tx_rings[0].count, new_tx_count); + vsi->tx_rings[0]->count, new_tx_count); tx_rings = kcalloc(vsi->alloc_queue_pairs, sizeof(struct i40e_ring), GFP_KERNEL); if (!tx_rings) { @@ -464,7 +464,7 @@ static int i40e_set_ringparam(struct net_device *netdev, for (i = 0; i < vsi->num_queue_pairs; i++) { /* clone ring and setup updated count */ - tx_rings[i] = vsi->tx_rings[i]; + tx_rings[i] = *vsi->tx_rings[i]; tx_rings[i].count = new_tx_count; err = i40e_setup_tx_descriptors(&tx_rings[i]); if (err) { @@ -481,10 +481,10 @@ static int i40e_set_ringparam(struct net_device *netdev, } /* alloc updated Rx resources */ - if (new_rx_count != vsi->rx_rings[0].count) { + if (new_rx_count != vsi->rx_rings[0]->count) { netdev_info(netdev, "Changing Rx descriptor count from %d to %d\n", - vsi->rx_rings[0].count, new_rx_count); + vsi->rx_rings[0]->count, new_rx_count); rx_rings = kcalloc(vsi->alloc_queue_pairs, sizeof(struct i40e_ring), GFP_KERNEL); if (!rx_rings) { @@ -494,7 +494,7 @@ static int i40e_set_ringparam(struct net_device *netdev, for (i = 0; i < vsi->num_queue_pairs; i++) { /* clone ring and setup updated count */ - rx_rings[i] = vsi->rx_rings[i]; + rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_count; err = i40e_setup_rx_descriptors(&rx_rings[i]); if (err) { @@ -517,8 +517,8 @@ static int i40e_set_ringparam(struct net_device *netdev, if (tx_rings) { for (i = 0; i < vsi->num_queue_pairs; i++) { - i40e_free_tx_resources(&vsi->tx_rings[i]); - vsi->tx_rings[i] = tx_rings[i]; + i40e_free_tx_resources(vsi->tx_rings[i]); + *vsi->tx_rings[i] = tx_rings[i]; } kfree(tx_rings); tx_rings = NULL; @@ -526,8 +526,8 @@ static int i40e_set_ringparam(struct net_device *netdev, if (rx_rings) { for (i = 0; i < vsi->num_queue_pairs; i++) { - i40e_free_rx_resources(&vsi->rx_rings[i]); - vsi->rx_rings[i] = rx_rings[i]; + i40e_free_rx_resources(vsi->rx_rings[i]); + *vsi->rx_rings[i] = rx_rings[i]; } kfree(rx_rings); rx_rings = NULL; @@ -588,10 +588,10 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) { - data[i] = vsi->tx_rings[j].stats.packets; - data[i + 1] = vsi->tx_rings[j].stats.bytes; - data[i + 2] = vsi->rx_rings[j].stats.packets; - data[i + 3] = vsi->rx_rings[j].stats.bytes; + data[i] = vsi->tx_rings[j]->stats.packets; + data[i + 1] = vsi->tx_rings[j]->stats.bytes; + data[i + 2] = vsi->rx_rings[j]->stats.packets; + data[i + 3] = vsi->rx_rings[j]->stats.bytes; } if (vsi == pf->vsi[pf->lan_vsi]) { for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index c74ac585c6395..3cf23f5ffe20a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -376,14 +376,14 @@ void i40e_vsi_reset_stats(struct i40e_vsi *vsi) memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets)); if (vsi->rx_rings) for (i = 0; i < vsi->num_queue_pairs; i++) { - memset(&vsi->rx_rings[i].stats, 0 , - sizeof(vsi->rx_rings[i].stats)); - memset(&vsi->rx_rings[i].rx_stats, 0 , - sizeof(vsi->rx_rings[i].rx_stats)); - memset(&vsi->tx_rings[i].stats, 0 , - sizeof(vsi->tx_rings[i].stats)); - memset(&vsi->tx_rings[i].tx_stats, 0, - sizeof(vsi->tx_rings[i].tx_stats)); + memset(&vsi->rx_rings[i]->stats, 0 , + sizeof(vsi->rx_rings[i]->stats)); + memset(&vsi->rx_rings[i]->rx_stats, 0 , + sizeof(vsi->rx_rings[i]->rx_stats)); + memset(&vsi->tx_rings[i]->stats, 0 , + sizeof(vsi->tx_rings[i]->stats)); + memset(&vsi->tx_rings[i]->tx_stats, 0, + sizeof(vsi->tx_rings[i]->tx_stats)); } vsi->stat_offsets_loaded = false; } @@ -602,7 +602,7 @@ static void i40e_update_link_xoff_rx(struct i40e_pf *pf) continue; for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *ring = &vsi->tx_rings[i]; + struct i40e_ring *ring = vsi->tx_rings[i]; clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state); } } @@ -656,7 +656,7 @@ static void i40e_update_prio_xoff_rx(struct i40e_pf *pf) continue; for (i = 0; i < vsi->num_queue_pairs; i++) { - struct i40e_ring *ring = &vsi->tx_rings[i]; + struct i40e_ring *ring = vsi->tx_rings[i]; tc = ring->dcb_tc; if (xoff[tc]) @@ -711,13 +711,13 @@ void i40e_update_stats(struct i40e_vsi *vsi) for (q = 0; q < vsi->num_queue_pairs; q++) { struct i40e_ring *p; - p = &vsi->rx_rings[q]; + p = vsi->rx_rings[q]; rx_b += p->stats.bytes; rx_p += p->stats.packets; rx_buf += p->rx_stats.alloc_rx_buff_failed; rx_page += p->rx_stats.alloc_rx_page_failed; - p = &vsi->tx_rings[q]; + p = vsi->tx_rings[q]; tx_b += p->stats.bytes; tx_p += p->stats.packets; tx_restart += p->tx_stats.restart_queue; @@ -1992,7 +1992,7 @@ static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi) int i, err = 0; for (i = 0; i < vsi->num_queue_pairs && !err; i++) - err = i40e_setup_tx_descriptors(&vsi->tx_rings[i]); + err = i40e_setup_tx_descriptors(vsi->tx_rings[i]); return err; } @@ -2008,8 +2008,8 @@ static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi) int i; for (i = 0; i < vsi->num_queue_pairs; i++) - if (vsi->tx_rings[i].desc) - i40e_free_tx_resources(&vsi->tx_rings[i]); + if (vsi->tx_rings[i]->desc) + i40e_free_tx_resources(vsi->tx_rings[i]); } /** @@ -2027,7 +2027,7 @@ static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi) int i, err = 0; for (i = 0; i < vsi->num_queue_pairs && !err; i++) - err = i40e_setup_rx_descriptors(&vsi->rx_rings[i]); + err = i40e_setup_rx_descriptors(vsi->rx_rings[i]); return err; } @@ -2042,8 +2042,8 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi) int i; for (i = 0; i < vsi->num_queue_pairs; i++) - if (vsi->rx_rings[i].desc) - i40e_free_rx_resources(&vsi->rx_rings[i]); + if (vsi->rx_rings[i]->desc) + i40e_free_rx_resources(vsi->rx_rings[i]); } /** @@ -2227,8 +2227,8 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi) int err = 0; u16 i; - for (i = 0; (i < vsi->num_queue_pairs) && (!err); i++) - err = i40e_configure_tx_ring(&vsi->tx_rings[i]); + for (i = 0; (i < vsi->num_queue_pairs) && !err; i++) + err = i40e_configure_tx_ring(vsi->tx_rings[i]); return err; } @@ -2278,7 +2278,7 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) /* set up individual rings */ for (i = 0; i < vsi->num_queue_pairs && !err; i++) - err = i40e_configure_rx_ring(&vsi->rx_rings[i]); + err = i40e_configure_rx_ring(vsi->rx_rings[i]); return err; } @@ -2302,8 +2302,8 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) qoffset = vsi->tc_config.tc_info[n].qoffset; qcount = vsi->tc_config.tc_info[n].qcount; for (i = qoffset; i < (qoffset + qcount); i++) { - struct i40e_ring *rx_ring = &vsi->rx_rings[i]; - struct i40e_ring *tx_ring = &vsi->tx_rings[i]; + struct i40e_ring *rx_ring = vsi->rx_rings[i]; + struct i40e_ring *tx_ring = vsi->tx_rings[i]; rx_ring->dcb_tc = n; tx_ring->dcb_tc = n; } @@ -2615,8 +2615,8 @@ static void i40e_vsi_disable_irq(struct i40e_vsi *vsi) int i; for (i = 0; i < vsi->num_queue_pairs; i++) { - wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i].reg_idx), 0); - wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i].reg_idx), 0); + wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), 0); + wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), 0); } if (pf->flags & I40E_FLAG_MSIX_ENABLED) { @@ -2786,8 +2786,8 @@ static irqreturn_t i40e_intr(int irq, void *data) static void map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct i40e_ring *tx_ring = &(vsi->tx_rings[qp_idx]); - struct i40e_ring *rx_ring = &(vsi->rx_rings[qp_idx]); + struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx]; + struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx]; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -3792,8 +3792,8 @@ void i40e_down(struct i40e_vsi *vsi) i40e_napi_disable_all(vsi); for (i = 0; i < vsi->num_queue_pairs; i++) { - i40e_clean_tx_ring(&vsi->tx_rings[i]); - i40e_clean_rx_ring(&vsi->rx_rings[i]); + i40e_clean_tx_ring(vsi->tx_rings[i]); + i40e_clean_rx_ring(vsi->rx_rings[i]); } } @@ -4220,9 +4220,9 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf) continue; for (i = 0; i < vsi->num_queue_pairs; i++) { - set_check_for_tx_hang(&vsi->tx_rings[i]); + set_check_for_tx_hang(vsi->tx_rings[i]); if (test_bit(__I40E_HANG_CHECK_ARMED, - &vsi->tx_rings[i].state)) + &vsi->tx_rings[i]->state)) armed++; } @@ -4959,6 +4959,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) int ret = -ENODEV; struct i40e_vsi *vsi; int sz_vectors; + int sz_rings; int vsi_idx; int i; @@ -5004,7 +5005,18 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) vsi->work_limit = I40E_DEFAULT_IRQ_WORK; INIT_LIST_HEAD(&vsi->mac_filter_list); - i40e_set_num_rings_in_vsi(vsi); + ret = i40e_set_num_rings_in_vsi(vsi); + if (ret) + goto err_rings; + + /* allocate memory for ring pointers */ + sz_rings = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs * 2; + vsi->tx_rings = kzalloc(sz_rings, GFP_KERNEL); + if (!vsi->tx_rings) { + ret = -ENOMEM; + goto err_rings; + } + vsi->rx_rings = &vsi->tx_rings[vsi->alloc_queue_pairs]; /* allocate memory for q_vector pointers */ sz_vectors = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors; @@ -5022,6 +5034,8 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type) goto unlock_pf; err_vectors: + kfree(vsi->tx_rings); +err_rings: pf->next_vsi = i - 1; kfree(vsi); unlock_pf: @@ -5067,6 +5081,7 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) /* free the ring and vector containers */ kfree(vsi->q_vectors); + kfree(vsi->tx_rings); pf->vsi[vsi->idx] = NULL; if (vsi->idx < pf->next_vsi) @@ -5080,6 +5095,23 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) return 0; } +/** + * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI + * @vsi: the VSI being cleaned + **/ +static s32 i40e_vsi_clear_rings(struct i40e_vsi *vsi) +{ + int i; + + for (i = 0; i < vsi->alloc_queue_pairs; i++) { + kfree_rcu(vsi->tx_rings[i], rcu); + vsi->tx_rings[i] = NULL; + vsi->rx_rings[i] = NULL; + } + + return 0; +} + /** * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI * @vsi: the VSI being configured @@ -5087,28 +5119,16 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi) static int i40e_alloc_rings(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - int ret = 0; int i; - vsi->rx_rings = kcalloc(vsi->alloc_queue_pairs, - sizeof(struct i40e_ring), GFP_KERNEL); - if (!vsi->rx_rings) { - ret = -ENOMEM; - goto err_alloc_rings; - } - - vsi->tx_rings = kcalloc(vsi->alloc_queue_pairs, - sizeof(struct i40e_ring), GFP_KERNEL); - if (!vsi->tx_rings) { - ret = -ENOMEM; - kfree(vsi->rx_rings); - goto err_alloc_rings; - } - /* Set basic values in the rings to be used later during open() */ for (i = 0; i < vsi->alloc_queue_pairs; i++) { - struct i40e_ring *rx_ring = &vsi->rx_rings[i]; - struct i40e_ring *tx_ring = &vsi->tx_rings[i]; + struct i40e_ring *tx_ring; + struct i40e_ring *rx_ring; + + tx_ring = kzalloc(sizeof(struct i40e_ring) * 2, GFP_KERNEL); + if (!tx_ring) + goto err_out; tx_ring->queue_index = i; tx_ring->reg_idx = vsi->base_queue + i; @@ -5119,7 +5139,9 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) tx_ring->count = vsi->num_desc; tx_ring->size = 0; tx_ring->dcb_tc = 0; + vsi->tx_rings[i] = tx_ring; + rx_ring = &tx_ring[1]; rx_ring->queue_index = i; rx_ring->reg_idx = vsi->base_queue + i; rx_ring->ring_active = false; @@ -5133,24 +5155,14 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) set_ring_16byte_desc_enabled(rx_ring); else clear_ring_16byte_desc_enabled(rx_ring); - } - -err_alloc_rings: - return ret; -} - -/** - * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI - * @vsi: the VSI being cleaned - **/ -static int i40e_vsi_clear_rings(struct i40e_vsi *vsi) -{ - if (vsi) { - kfree(vsi->rx_rings); - kfree(vsi->tx_rings); + vsi->rx_rings[i] = rx_ring; } return 0; + +err_out: + i40e_vsi_clear_rings(vsi); + return -ENOMEM; } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index f153f3770346d..9eee551aa49e1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -64,7 +64,7 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data, if (!vsi) return -ENOENT; - tx_ring = &vsi->tx_rings[0]; + tx_ring = vsi->tx_rings[0]; dev = tx_ring->dev; dma = dma_map_single(dev, fdir_data->raw_packet, @@ -1823,7 +1823,7 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; - struct i40e_ring *tx_ring = &vsi->tx_rings[skb->queue_mapping]; + struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping]; /* hardware can't handle really short frames, hardware padding works * beyond this point diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index c2a6746a5ec90..5db36c38573e1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -228,6 +228,8 @@ struct i40e_ring { struct i40e_vsi *vsi; /* Backreference to associated VSI */ struct i40e_q_vector *q_vector; /* Backreference to associated vector */ + + struct rcu_head rcu; /* to avoid race on free */ } ____cacheline_internodealigned_in_smp; enum i40e_latency_range { From 980e9b1186424fa3eb766d59fc91003d0ed1ed6a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Sat, 28 Sep 2013 06:01:03 +0000 Subject: [PATCH 12/13] i40e: Add support for 64 bit netstats This change brings support for 64 bit netstats to the driver. Previously the stats were 64 bit but highly racy due to the fact that 64 bit transactions are not atomic on 32 bit systems. This change makes is so that the 64 bit byte and packet stats are reliable on all architectures. Signed-off-by: Alexander Duyck Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/i40e/i40e_ethtool.c | 27 ++++++- drivers/net/ethernet/intel/i40e/i40e_main.c | 78 ++++++++++++++++--- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 4 + drivers/net/ethernet/intel/i40e/i40e_txrx.h | 1 + 4 files changed, 95 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 50153ea2d63c6..1b86138fa9e19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -579,6 +579,7 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, char *p; int j; struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi); + unsigned int start; i40e_update_stats(vsi); @@ -587,12 +588,30 @@ static void i40e_get_ethtool_stats(struct net_device *netdev, data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } + rcu_read_lock(); for (j = 0; j < vsi->num_queue_pairs; j++, i += 4) { - data[i] = vsi->tx_rings[j]->stats.packets; - data[i + 1] = vsi->tx_rings[j]->stats.bytes; - data[i + 2] = vsi->rx_rings[j]->stats.packets; - data[i + 3] = vsi->rx_rings[j]->stats.bytes; + struct i40e_ring *tx_ring = ACCESS_ONCE(vsi->tx_rings[j]); + struct i40e_ring *rx_ring; + + if (!tx_ring) + continue; + + /* process Tx ring statistics */ + do { + start = u64_stats_fetch_begin_bh(&tx_ring->syncp); + data[i] = tx_ring->stats.packets; + data[i + 1] = tx_ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&tx_ring->syncp, start)); + + /* Rx ring is the 2nd half of the queue pair */ + rx_ring = &tx_ring[1]; + do { + start = u64_stats_fetch_begin_bh(&rx_ring->syncp); + data[i + 2] = rx_ring->stats.packets; + data[i + 3] = rx_ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&rx_ring->syncp, start)); } + rcu_read_unlock(); if (vsi == pf->vsi[pf->lan_vsi]) { for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) { p = (char *)pf + i40e_gstrings_stats[j].stat_offset; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 3cf23f5ffe20a..24ee5d46b758f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -347,14 +347,53 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi) **/ static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct( struct net_device *netdev, - struct rtnl_link_stats64 *storage) + struct rtnl_link_stats64 *stats) { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + struct rtnl_link_stats64 *vsi_stats = i40e_get_vsi_stats_struct(vsi); + int i; + + rcu_read_lock(); + for (i = 0; i < vsi->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u64 bytes, packets; + unsigned int start; + + tx_ring = ACCESS_ONCE(vsi->tx_rings[i]); + if (!tx_ring) + continue; - *storage = *i40e_get_vsi_stats_struct(vsi); + do { + start = u64_stats_fetch_begin_bh(&tx_ring->syncp); + packets = tx_ring->stats.packets; + bytes = tx_ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&tx_ring->syncp, start)); + + stats->tx_packets += packets; + stats->tx_bytes += bytes; + rx_ring = &tx_ring[1]; + + do { + start = u64_stats_fetch_begin_bh(&rx_ring->syncp); + packets = rx_ring->stats.packets; + bytes = rx_ring->stats.bytes; + } while (u64_stats_fetch_retry_bh(&rx_ring->syncp, start)); - return storage; + stats->rx_packets += packets; + stats->rx_bytes += bytes; + } + rcu_read_unlock(); + + /* following stats updated by ixgbe_watchdog_task() */ + stats->multicast = vsi_stats->multicast; + stats->tx_errors = vsi_stats->tx_errors; + stats->tx_dropped = vsi_stats->tx_dropped; + stats->rx_errors = vsi_stats->rx_errors; + stats->rx_crc_errors = vsi_stats->rx_crc_errors; + stats->rx_length_errors = vsi_stats->rx_length_errors; + + return stats; } /** @@ -708,21 +747,38 @@ void i40e_update_stats(struct i40e_vsi *vsi) tx_restart = tx_busy = 0; rx_page = 0; rx_buf = 0; + rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { struct i40e_ring *p; + u64 bytes, packets; + unsigned int start; - p = vsi->rx_rings[q]; - rx_b += p->stats.bytes; - rx_p += p->stats.packets; - rx_buf += p->rx_stats.alloc_rx_buff_failed; - rx_page += p->rx_stats.alloc_rx_page_failed; + /* locate Tx ring */ + p = ACCESS_ONCE(vsi->tx_rings[q]); - p = vsi->tx_rings[q]; - tx_b += p->stats.bytes; - tx_p += p->stats.packets; + do { + start = u64_stats_fetch_begin_bh(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; + } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + tx_b += bytes; + tx_p += packets; tx_restart += p->tx_stats.restart_queue; tx_busy += p->tx_stats.tx_busy; + + /* Rx queue is part of the same block as Tx queue */ + p = &p[1]; + do { + start = u64_stats_fetch_begin_bh(&p->syncp); + packets = p->stats.packets; + bytes = p->stats.bytes; + } while (u64_stats_fetch_retry_bh(&p->syncp, start)); + rx_b += bytes; + rx_p += packets; + rx_buf += p->rx_stats.alloc_rx_buff_failed; + rx_page += p->rx_stats.alloc_rx_page_failed; } + rcu_read_unlock(); vsi->tx_restart = tx_restart; vsi->tx_busy = tx_busy; vsi->rx_page_failed = rx_page; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 9eee551aa49e1..dc89e72fd0f48 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -411,8 +411,10 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget) i += tx_ring->count; tx_ring->next_to_clean = i; + u64_stats_update_begin(&tx_ring->syncp); tx_ring->stats.bytes += total_bytes; tx_ring->stats.packets += total_packets; + u64_stats_update_end(&tx_ring->syncp); tx_ring->q_vector->tx.total_bytes += total_bytes; tx_ring->q_vector->tx.total_packets += total_packets; @@ -1075,8 +1077,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) } rx_ring->next_to_clean = i; + u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); rx_ring->q_vector->rx.total_packets += total_rx_packets; rx_ring->q_vector->rx.total_bytes += total_rx_bytes; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 5db36c38573e1..db55d9947f151 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -218,6 +218,7 @@ struct i40e_ring { /* stats structs */ struct i40e_queue_stats stats; + struct u64_stats_sync syncp; union { struct i40e_tx_queue_stats tx_stats; struct i40e_rx_queue_stats rx_stats; From d04795d6630d85be7359eb06695f8365d53b2c60 Mon Sep 17 00:00:00 2001 From: Catherine Sullivan Date: Sat, 28 Sep 2013 06:00:07 +0000 Subject: [PATCH 13/13] i40e: Bump version Update the version number of the driver. Signed-off-by: Catherine Sullivan Signed-off-by: Jesse Brandeburg Tested-by: Kavindya Deegala Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 24ee5d46b758f..fbe7fe2914a9f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -36,7 +36,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 0 #define DRV_VERSION_MINOR 3 -#define DRV_VERSION_BUILD 9 +#define DRV_VERSION_BUILD 10 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN