From f49be6dcd74bfb99b6f9deba4a976f74aad1e844 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 1 Jul 2020 16:54:53 +0800 Subject: [PATCH 1/5] i40e: not compute affinity_mask for IRQ After commit 759dc4a7e605 ("i40e: initialize our affinity_mask based on cpu_possible_mask"), NAPI IRQ affinity_mask is bind to all possible CPUs, not a fixed CPU Signed-off-by: Li RongQing Tested-by: Andrew Bowers Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_main.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 05c6d3ea11e65..9cfaa99da4e60 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11186,11 +11186,10 @@ static int i40e_init_msix(struct i40e_pf *pf) * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector * @vsi: the VSI being configured * @v_idx: index of the vector in the vsi struct - * @cpu: cpu to be used on affinity_mask * * We allocate one q_vector. If allocation fails we return -ENOMEM. **/ -static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) +static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx) { struct i40e_q_vector *q_vector; @@ -11223,7 +11222,7 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; - int err, v_idx, num_q_vectors, current_cpu; + int err, v_idx, num_q_vectors; /* if not MSIX, give the one vector only to the LAN VSI */ if (pf->flags & I40E_FLAG_MSIX_ENABLED) @@ -11233,15 +11232,10 @@ static int i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi) else return -EINVAL; - current_cpu = cpumask_first(cpu_online_mask); - for (v_idx = 0; v_idx < num_q_vectors; v_idx++) { - err = i40e_vsi_alloc_q_vector(vsi, v_idx, current_cpu); + err = i40e_vsi_alloc_q_vector(vsi, v_idx); if (err) goto err_out; - current_cpu = cpumask_next(current_cpu, cpu_online_mask); - if (unlikely(current_cpu >= nr_cpu_ids)) - current_cpu = cpumask_first(cpu_online_mask); } return 0; From 1fa5cef283420b3dad93cd6ab04d7125bc1562de Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 18 Aug 2020 15:07:57 +0800 Subject: [PATCH 2/5] i40e: optimise prefetch page refcount refcount of rx_buffer page will be added here originally, so prefetchw is needed, but after commit 1793668c3b8c ("i40e/i40evf: Update code to better handle incrementing page count"), and refcount is not added every time, so change prefetchw as prefetch. Now it mainly services page_address(), but which accesses struct page only when WANT_PAGE_VIRTUAL or HASHED_PAGE_VIRTUAL is defined otherwise it returns address based on offset, so we prefetch it conditionally. Jakub suggested to define prefetch_page_address in a common header. Reported-by: kernel test robot Suggested-by: Jakub Kicinski Signed-off-by: Li RongQing Reviewed-by: Jesse Brandeburg Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 +- include/linux/prefetch.h | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 91ab824926b9b..8500e1c1a16b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1953,7 +1953,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer; rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); - prefetchw(rx_buffer->page); + prefetch_page_address(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ dma_sync_single_range_for_cpu(rx_ring->dev, diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h index 13eafebf3549a..b83a3f944f287 100644 --- a/include/linux/prefetch.h +++ b/include/linux/prefetch.h @@ -15,6 +15,7 @@ #include #include +struct page; /* prefetch(x) attempts to pre-emptively get the memory pointed to by address "x" into the CPU L1 cache. @@ -62,4 +63,11 @@ static inline void prefetch_range(void *addr, size_t len) #endif } +static inline void prefetch_page_address(struct page *page) +{ +#if defined(WANT_PAGE_VIRTUAL) || defined(HASHED_PAGE_VIRTUAL) + prefetch(page); +#endif +} + #endif From f78bd13027d9d43a6f5f3469377cd6bfd4218a14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 25 Aug 2020 13:35:54 +0200 Subject: [PATCH 3/5] i40e, xsk: remove HW descriptor prefetch in AF_XDP path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The software prefetching of HW descriptors has a negative impact on the performance. Therefore, it is now removed. Performance for the rx_drop benchmark increased with 2%. Signed-off-by: Björn Töpel Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 13 +++++++++++++ drivers/net/ethernet/intel/i40e/i40e_txrx_common.h | 13 ------------- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 12 ++++++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 8500e1c1a16b7..b43bc20f701d6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2295,6 +2295,19 @@ void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res) } } +/** + * i40e_inc_ntc: Advance the next_to_clean index + * @rx_ring: Rx ring + **/ +static void i40e_inc_ntc(struct i40e_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + prefetch(I40E_RX_DESC(rx_ring, ntc)); +} + /** * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: rx descriptor ring to transact packets on diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h index 667c4dc4b39f7..1397dd3c1c573 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h @@ -99,19 +99,6 @@ static inline bool i40e_rx_is_programming_status(u64 qword1) return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK; } -/** - * i40e_inc_ntc: Advance the next_to_clean index - * @rx_ring: Rx ring - **/ -static inline void i40e_inc_ntc(struct i40e_ring *rx_ring) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - prefetch(I40E_RX_DESC(rx_ring, ntc)); -} - void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring); void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring); bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi); diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 2a1153d8957ba..cf48758447c2a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -257,6 +257,18 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring, return skb; } +/** + * i40e_inc_ntc: Advance the next_to_clean index + * @rx_ring: Rx ring + **/ +static void i40e_inc_ntc(struct i40e_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; +} + /** * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring * @rx_ring: Rx ring From f0064bfd969bea0d65ef20d191bea8b7494b4ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 25 Aug 2020 13:35:55 +0200 Subject: [PATCH 4/5] i40e: use 16B HW descriptors instead of 32B MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i40e NIC supports two flavors of HW descriptors, 16 and 32 byte. The latter has, obviously, room for more offloading information. However, the only fields of the 32B HW descriptor that is being used by the driver, is also available in the 16B descriptor. In other words; Reading and writing 32 bytes instead of 16 byte is a waste of bus bandwidth. This commit starts using 16 byte descriptors instead of 32 byte descriptors. For AF_XDP the rx_drop benchmark was improved by 2%. Signed-off-by: Björn Töpel Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_debugfs.c | 10 ++++------ drivers/net/ethernet/intel/i40e/i40e_main.c | 4 ++-- drivers/net/ethernet/intel/i40e/i40e_trace.h | 6 +++--- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 6 +++--- drivers/net/ethernet/intel/i40e/i40e_txrx.h | 2 +- drivers/net/ethernet/intel/i40e/i40e_type.h | 5 ++++- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index a7e212d1caa22..ada0e93c38f04 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -90,7 +90,7 @@ #define I40E_OEM_RELEASE_MASK 0x0000ffff #define I40E_RX_DESC(R, i) \ - (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])) + (&(((union i40e_rx_desc *)((R)->desc))[i])) #define I40E_TX_DESC(R, i) \ (&(((struct i40e_tx_desc *)((R)->desc))[i])) #define I40E_TX_CTXTDESC(R, i) \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d3ad2e3aa8383..d7c13ca9be7dd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -604,10 +604,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, } else { rxd = I40E_RX_DESC(ring, i); dev_info(&pf->pdev->dev, - " d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + " d[%03x] = 0x%016llx 0x%016llx\n", i, rxd->read.pkt_addr, - rxd->read.hdr_addr, - rxd->read.rsvd1, rxd->read.rsvd2); + rxd->read.hdr_addr); } } } else if (cnt == 3) { @@ -625,10 +624,9 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, } else { rxd = I40E_RX_DESC(ring, desc_n); dev_info(&pf->pdev->dev, - "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n", + "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n", vsi_seid, ring_id, desc_n, - rxd->read.pkt_addr, rxd->read.hdr_addr, - rxd->read.rsvd1, rxd->read.rsvd2); + rxd->read.pkt_addr, rxd->read.hdr_addr); } } else { dev_info(&pf->pdev->dev, "dump desc rx/tx/xdp []\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9cfaa99da4e60..07207e21874f1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3321,8 +3321,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) rx_ctx.base = (ring->dma / 128); rx_ctx.qlen = ring->count; - /* use 32 byte descriptors */ - rx_ctx.dsize = 1; + /* use 16 byte descriptors */ + rx_ctx.dsize = 0; /* descriptor type is always zero * rx_ctx.dtype = 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_trace.h b/drivers/net/ethernet/intel/i40e/i40e_trace.h index 424f02077e2e2..983f8b98b2756 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_trace.h +++ b/drivers/net/ethernet/intel/i40e/i40e_trace.h @@ -112,7 +112,7 @@ DECLARE_EVENT_CLASS( i40e_rx_template, TP_PROTO(struct i40e_ring *ring, - union i40e_32byte_rx_desc *desc, + union i40e_16byte_rx_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb), @@ -140,7 +140,7 @@ DECLARE_EVENT_CLASS( DEFINE_EVENT( i40e_rx_template, i40e_clean_rx_irq, TP_PROTO(struct i40e_ring *ring, - union i40e_32byte_rx_desc *desc, + union i40e_16byte_rx_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb)); @@ -148,7 +148,7 @@ DEFINE_EVENT( DEFINE_EVENT( i40e_rx_template, i40e_clean_rx_irq_rx, TP_PROTO(struct i40e_ring *ring, - union i40e_32byte_rx_desc *desc, + union i40e_16byte_rx_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b43bc20f701d6..1606ba5318f72 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -533,11 +533,11 @@ static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw, { struct i40e_pf *pf = rx_ring->vsi->back; struct pci_dev *pdev = pf->pdev; - struct i40e_32b_rx_wb_qw0 *qw0; + struct i40e_16b_rx_wb_qw0 *qw0; u32 fcnt_prog, fcnt_avail; u32 error; - qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw; + qw0 = (struct i40e_16b_rx_wb_qw0 *)&qword0_raw; error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >> I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT; @@ -1418,7 +1418,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc); + rx_ring->size = rx_ring->count * sizeof(union i40e_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 703b644fd71f7..66c2b92c0d100 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -110,7 +110,7 @@ enum i40e_dyn_idx_t { */ #define I40E_RX_HDR_SIZE I40E_RXBUFFER_256 #define I40E_PACKET_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) -#define i40e_rx_desc i40e_32byte_rx_desc +#define i40e_rx_desc i40e_16byte_rx_desc #define I40E_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 52410d609ba1d..97d29df65f9e8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -628,7 +628,7 @@ union i40e_16byte_rx_desc { __le64 hdr_addr; /* Header buffer address */ } read; struct { - struct { + struct i40e_16b_rx_wb_qw0 { struct { union { __le16 mirroring_status; @@ -647,6 +647,9 @@ union i40e_16byte_rx_desc { __le64 status_error_len; } qword1; } wb; /* writeback */ + struct { + u64 qword[2]; + } raw; }; union i40e_32byte_rx_desc { From 8cbf74149903382cf422c653d4ee8fb2f44e3ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 25 Aug 2020 13:35:56 +0200 Subject: [PATCH 5/5] i40e, xsk: move buffer allocation out of the Rx processing loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of checking in each iteration of the Rx packet processing loop, move the allocation out of the loop and do it once for each napi activation. For AF_XDP the rx_drop benchmark was improved by 6%. Signed-off-by: Björn Töpel Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index cf48758447c2a..6acede0acdca5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -281,8 +281,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); unsigned int xdp_res, xdp_xmit = 0; - bool failure = false; struct sk_buff *skb; + bool failure; while (likely(total_rx_packets < (unsigned int)budget)) { union i40e_rx_desc *rx_desc; @@ -290,13 +290,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) unsigned int size; u64 qword; - if (cleaned_count >= I40E_RX_BUFFER_WRITE) { - failure = failure || - !i40e_alloc_rx_buffers_zc(rx_ring, - cleaned_count); - cleaned_count = 0; - } - rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean); qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); @@ -371,6 +364,9 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) napi_gro_receive(&rx_ring->q_vector->napi, skb); } + if (cleaned_count >= I40E_RX_BUFFER_WRITE) + failure = !i40e_alloc_rx_buffers_zc(rx_ring, cleaned_count); + i40e_finalize_xdp_rx(rx_ring, xdp_xmit); i40e_update_rx_stats(rx_ring, total_rx_bytes, total_rx_packets);