diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index e7e778ca074c0..6f85879ba9933 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -193,42 +193,40 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) { u16 ntu = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; - struct xdp_buff **bi, *xdp; + struct xdp_buff **xdp; + u32 nb_buffs, i; dma_addr_t dma; - bool ok = true; rx_desc = I40E_RX_DESC(rx_ring, ntu); - bi = i40e_rx_bi(rx_ring, ntu); - do { - xdp = xsk_buff_alloc(rx_ring->xsk_pool); - if (!xdp) { - ok = false; - goto no_buffers; - } - *bi = xdp; - dma = xsk_buff_xdp_get_dma(xdp); + xdp = i40e_rx_bi(rx_ring, ntu); + + nb_buffs = min_t(u16, count, rx_ring->count - ntu); + nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); + if (!nb_buffs) + return false; + + i = nb_buffs; + while (i--) { + dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->read.hdr_addr = 0; rx_desc++; - bi++; - ntu++; - - if (unlikely(ntu == rx_ring->count)) { - rx_desc = I40E_RX_DESC(rx_ring, 0); - bi = i40e_rx_bi(rx_ring, 0); - ntu = 0; - } - } while (--count); + xdp++; + } -no_buffers: - if (rx_ring->next_to_use != ntu) { - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.qword1.status_error_len = 0; - i40e_release_rx_desc(rx_ring, ntu); + ntu += nb_buffs; + if (ntu == rx_ring->count) { + rx_desc = I40E_RX_DESC(rx_ring, 0); + xdp = i40e_rx_bi(rx_ring, 0); + ntu = 0; } - return ok; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.qword1.status_error_len = 0; + i40e_release_rx_desc(rx_ring, ntu); + + return count == nb_buffs ? true : false; } /** @@ -365,7 +363,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) break; bi = *i40e_rx_bi(rx_ring, next_to_clean); - bi->data_end = bi->data + size; + xsk_buff_set_size(bi, size); xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool); xdp_res = i40e_run_xdp_zc(rx_ring, bi); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 1e46e80f3d6f8..7c2328529ff8e 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -164,17 +164,10 @@ struct ice_tx_offload_params { }; struct ice_rx_buf { - union { - struct { - dma_addr_t dma; - struct page *page; - unsigned int page_offset; - u16 pagecnt_bias; - }; - struct { - struct xdp_buff *xdp; - }; - }; + dma_addr_t dma; + struct page *page; + unsigned int page_offset; + u16 pagecnt_bias; }; struct ice_q_stats { @@ -270,6 +263,7 @@ struct ice_ring { union { struct ice_tx_buf *tx_buf; struct ice_rx_buf *rx_buf; + struct xdp_buff **xdp_buf; }; /* CL2 - 2nd cacheline starts here */ u16 q_index; /* Queue number of ring */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 5a9f61deeb38d..7682eaa9a9ece 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -364,45 +364,39 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; - struct ice_rx_buf *rx_buf; - bool ok = true; + struct xdp_buff **xdp; + u32 nb_buffs, i; dma_addr_t dma; - if (!count) - return true; - rx_desc = ICE_RX_DESC(rx_ring, ntu); - rx_buf = &rx_ring->rx_buf[ntu]; + xdp = &rx_ring->xdp_buf[ntu]; - do { - rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool); - if (!rx_buf->xdp) { - ok = false; - break; - } + nb_buffs = min_t(u16, count, rx_ring->count - ntu); + nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); + if (!nb_buffs) + return false; - dma = xsk_buff_xdp_get_dma(rx_buf->xdp); + i = nb_buffs; + while (i--) { + dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); - rx_desc->wb.status_error0 = 0; rx_desc++; - rx_buf++; - ntu++; - - if (unlikely(ntu == rx_ring->count)) { - rx_desc = ICE_RX_DESC(rx_ring, 0); - rx_buf = rx_ring->rx_buf; - ntu = 0; - } - } while (--count); + xdp++; + } - if (rx_ring->next_to_use != ntu) { - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.status_error0 = 0; - ice_release_rx_desc(rx_ring, ntu); + ntu += nb_buffs; + if (ntu == rx_ring->count) { + rx_desc = ICE_RX_DESC(rx_ring, 0); + xdp = rx_ring->xdp_buf; + ntu = 0; } - return ok; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.status_error0 = 0; + ice_release_rx_desc(rx_ring, ntu); + + return count == nb_buffs ? true : false; } /** @@ -421,19 +415,19 @@ static void ice_bump_ntc(struct ice_ring *rx_ring) /** * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer * @rx_ring: Rx ring - * @rx_buf: zero-copy Rx buffer + * @xdp_arr: Pointer to the SW ring of xdp_buff pointers * * This function allocates a new skb from a zero-copy Rx buffer. * * Returns the skb on success, NULL on failure. */ static struct sk_buff * -ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +ice_construct_skb_zc(struct ice_ring *rx_ring, struct xdp_buff **xdp_arr) { - unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta; - unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data; - unsigned int datasize_hard = rx_buf->xdp->data_end - - rx_buf->xdp->data_hard_start; + struct xdp_buff *xdp = *xdp_arr; + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; struct sk_buff *skb; skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, @@ -441,13 +435,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) if (unlikely(!skb)) return NULL; - skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); - xsk_buff_free(rx_buf->xdp); - rx_buf->xdp = NULL; + xsk_buff_free(xdp); + *xdp_arr = NULL; return skb; } @@ -521,7 +515,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; - struct ice_rx_buf *rx_buf; + struct xdp_buff **xdp; struct sk_buff *skb; u16 stat_err_bits; u16 vlan_tag = 0; @@ -544,18 +538,18 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) if (!size) break; - rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; - rx_buf->xdp->data_end = rx_buf->xdp->data + size; - xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool); + xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean]; + xsk_buff_set_size(*xdp, size); + xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool); - xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp); + xdp_res = ice_run_xdp_zc(rx_ring, *xdp); if (xdp_res) { if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) xdp_xmit |= xdp_res; else - xsk_buff_free(rx_buf->xdp); + xsk_buff_free(*xdp); - rx_buf->xdp = NULL; + *xdp = NULL; total_rx_bytes += size; total_rx_packets++; cleaned_count++; @@ -565,7 +559,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) } /* XDP_PASS path */ - skb = ice_construct_skb_zc(rx_ring, rx_buf); + skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { rx_ring->rx_stats.alloc_buf_failed++; break; @@ -813,12 +807,12 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) u16 i; for (i = 0; i < rx_ring->count; i++) { - struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + struct xdp_buff **xdp = &rx_ring->xdp_buf[i]; - if (!rx_buf->xdp) + if (!xdp) continue; - rx_buf->xdp = NULL; + *xdp = NULL; } } diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4e295541e3967..443d459515648 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -77,6 +77,12 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return xp_alloc(pool); } +/* Returns as many entries as possible up to max. 0 <= N <= max. */ +static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + return xp_alloc_batch(pool, xdp, max); +} + static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return xp_can_alloc(pool, count); @@ -89,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) xp_free(xskb); } +static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) +{ + xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; + xdp->data_meta = xdp->data; + xdp->data_end = xdp->data + size; +} + static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { @@ -212,6 +225,11 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return NULL; } +static inline u32 xsk_buff_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + return 0; +} + static inline bool xsk_buff_can_alloc(struct xsk_buff_pool *pool, u32 count) { return false; @@ -221,6 +239,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } +static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) +{ +} + static inline dma_addr_t xsk_buff_raw_get_dma(struct xsk_buff_pool *pool, u64 addr) { diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 7a9a23e7a604a..ddeefc4a10405 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -7,6 +7,7 @@ #include #include #include +#include #include struct xsk_buff_pool; @@ -23,7 +24,6 @@ struct xdp_buff_xsk { dma_addr_t dma; dma_addr_t frame_dma; struct xsk_buff_pool *pool; - bool unaligned; u64 orig_addr; struct list_head free_list_node; }; @@ -67,6 +67,7 @@ struct xsk_buff_pool { u32 free_heads_cnt; u32 headroom; u32 chunk_size; + u32 chunk_shift; u32 frame_len; u8 cached_need_wakeup; bool uses_need_wakeup; @@ -81,6 +82,13 @@ struct xsk_buff_pool { struct xdp_buff_xsk *free_heads[]; }; +/* Masks for xdp_umem_page flags. + * The low 12-bits of the addr will be 0 since this is the page address, so we + * can use them for flags. + */ +#define XSK_NEXT_PG_CONTIG_SHIFT 0 +#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) + /* AF_XDP core. */ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem); @@ -89,7 +97,6 @@ int xp_assign_dev(struct xsk_buff_pool *pool, struct net_device *dev, int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_umem *umem, struct net_device *dev, u16 queue_id); void xp_destroy(struct xsk_buff_pool *pool); -void xp_release(struct xdp_buff_xsk *xskb); void xp_get_pool(struct xsk_buff_pool *pool); bool xp_put_pool(struct xsk_buff_pool *pool); void xp_clear_dev(struct xsk_buff_pool *pool); @@ -99,12 +106,28 @@ void xp_del_xsk(struct xsk_buff_pool *pool, struct xdp_sock *xs); /* AF_XDP, and XDP core. */ void xp_free(struct xdp_buff_xsk *xskb); +static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, + u64 addr) +{ + xskb->orig_addr = addr; + xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; +} + +static inline void xp_init_xskb_dma(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool, + dma_addr_t *dma_pages, u64 addr) +{ + xskb->frame_dma = (dma_pages[addr >> PAGE_SHIFT] & ~XSK_NEXT_PG_CONTIG_MASK) + + (addr & ~PAGE_MASK); + xskb->dma = xskb->frame_dma + pool->headroom + XDP_PACKET_HEADROOM; +} + /* AF_XDP ZC drivers, via xdp_sock_buff.h */ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq); int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, unsigned long attrs, struct page **pages, u32 nr_pages); void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs); struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool); +u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max); bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count); void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr); dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr); @@ -180,4 +203,25 @@ static inline u64 xp_unaligned_add_offset_to_addr(u64 addr) xp_unaligned_extract_offset(addr); } +static inline u32 xp_aligned_extract_idx(struct xsk_buff_pool *pool, u64 addr) +{ + return xp_aligned_extract_addr(pool, addr) >> pool->chunk_shift; +} + +static inline void xp_release(struct xdp_buff_xsk *xskb) +{ + if (xskb->pool->unaligned) + xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; +} + +static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) +{ + u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; + + offset += xskb->pool->headroom; + if (!xskb->pool->unaligned) + return xskb->orig_addr + offset; + return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); +} + #endif /* XSK_BUFF_POOL_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d6b500dc42084..f16074eb53c72 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -134,21 +134,6 @@ int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, return 0; } -void xp_release(struct xdp_buff_xsk *xskb) -{ - xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb; -} - -static u64 xp_get_handle(struct xdp_buff_xsk *xskb) -{ - u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start; - - offset += xskb->pool->headroom; - if (!xskb->pool->unaligned) - return xskb->orig_addr + offset; - return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); -} - static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 8de01aaac4a08..96b14e51ba7ec 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -44,12 +44,13 @@ void xp_destroy(struct xsk_buff_pool *pool) struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, struct xdp_umem *umem) { + bool unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; struct xsk_buff_pool *pool; struct xdp_buff_xsk *xskb; - u32 i; + u32 i, entries; - pool = kvzalloc(struct_size(pool, free_heads, umem->chunks), - GFP_KERNEL); + entries = unaligned ? umem->chunks : 0; + pool = kvzalloc(struct_size(pool, free_heads, entries), GFP_KERNEL); if (!pool) goto out; @@ -63,7 +64,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->free_heads_cnt = umem->chunks; pool->headroom = umem->headroom; pool->chunk_size = umem->chunk_size; - pool->unaligned = umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; + pool->chunk_shift = ffs(umem->chunk_size) - 1; + pool->unaligned = unaligned; pool->frame_len = umem->chunk_size - umem->headroom - XDP_PACKET_HEADROOM; pool->umem = umem; @@ -81,7 +83,10 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, xskb = &pool->heads[i]; xskb->pool = pool; xskb->xdp.frame_sz = umem->chunk_size - umem->headroom; - pool->free_heads[i] = xskb; + if (pool->unaligned) + pool->free_heads[i] = xskb; + else + xp_init_xskb_addr(xskb, pool, i * pool->chunk_size); } return pool; @@ -406,6 +411,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev, if (pool->unaligned) xp_check_dma_contiguity(dma_map); + else + for (i = 0; i < pool->heads_cnt; i++) { + struct xdp_buff_xsk *xskb = &pool->heads[i]; + + xp_init_xskb_dma(xskb, pool, dma_map->dma_pages, xskb->orig_addr); + } err = xp_init_dma_info(pool, dma_map); if (err) { @@ -448,8 +459,6 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) if (pool->free_heads_cnt == 0) return NULL; - xskb = pool->free_heads[--pool->free_heads_cnt]; - for (;;) { if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) { pool->fq->queue_empty_descs++; @@ -466,17 +475,17 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool) } break; } - xskq_cons_release(pool->fq); - xskb->orig_addr = addr; - xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom; - if (pool->dma_pages_cnt) { - xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] & - ~XSK_NEXT_PG_CONTIG_MASK) + - (addr & ~PAGE_MASK); - xskb->dma = xskb->frame_dma + pool->headroom + - XDP_PACKET_HEADROOM; + if (pool->unaligned) { + xskb = pool->free_heads[--pool->free_heads_cnt]; + xp_init_xskb_addr(xskb, pool, addr); + if (pool->dma_pages_cnt) + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); + } else { + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; } + + xskq_cons_release(pool->fq); return xskb; } @@ -507,6 +516,96 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool) } EXPORT_SYMBOL(xp_alloc); +static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + u32 i, cached_cons, nb_entries; + + if (max > pool->free_heads_cnt) + max = pool->free_heads_cnt; + max = xskq_cons_nb_entries(pool->fq, max); + + cached_cons = pool->fq->cached_cons; + nb_entries = max; + i = max; + while (i--) { + struct xdp_buff_xsk *xskb; + u64 addr; + bool ok; + + __xskq_cons_read_addr_unchecked(pool->fq, cached_cons++, &addr); + + ok = pool->unaligned ? xp_check_unaligned(pool, &addr) : + xp_check_aligned(pool, &addr); + if (unlikely(!ok)) { + pool->fq->invalid_descs++; + nb_entries--; + continue; + } + + if (pool->unaligned) { + xskb = pool->free_heads[--pool->free_heads_cnt]; + xp_init_xskb_addr(xskb, pool, addr); + if (pool->dma_pages_cnt) + xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr); + } else { + xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)]; + } + + *xdp = &xskb->xdp; + xdp++; + } + + xskq_cons_release_n(pool->fq, max); + return nb_entries; +} + +static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 nb_entries) +{ + struct xdp_buff_xsk *xskb; + u32 i; + + nb_entries = min_t(u32, nb_entries, pool->free_list_cnt); + + i = nb_entries; + while (i--) { + xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node); + list_del(&xskb->free_list_node); + + *xdp = &xskb->xdp; + xdp++; + } + pool->free_list_cnt -= nb_entries; + + return nb_entries; +} + +u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) +{ + u32 nb_entries1 = 0, nb_entries2; + + if (unlikely(pool->dma_need_sync)) { + /* Slow path */ + *xdp = xp_alloc(pool); + return !!*xdp; + } + + if (unlikely(pool->free_list_cnt)) { + nb_entries1 = xp_alloc_reused(pool, xdp, max); + if (nb_entries1 == max) + return nb_entries1; + + max -= nb_entries1; + xdp += nb_entries1; + } + + nb_entries2 = xp_alloc_new_from_fq(pool, xdp, max); + if (!nb_entries2) + pool->fq->queue_empty_descs++; + + return nb_entries1 + nb_entries2; +} +EXPORT_SYMBOL(xp_alloc_batch); + bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count) { if (pool->free_list_cnt >= count) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 9ae13cccfb28d..e9aa2c2363561 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -111,14 +111,18 @@ struct xsk_queue { /* Functions that read and validate content from consumer rings. */ -static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) +static inline void __xskq_cons_read_addr_unchecked(struct xsk_queue *q, u32 cached_cons, u64 *addr) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + u32 idx = cached_cons & q->ring_mask; - if (q->cached_cons != q->cached_prod) { - u32 idx = q->cached_cons & q->ring_mask; + *addr = ring->desc[idx]; +} - *addr = ring->desc[idx]; +static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) +{ + if (q->cached_cons != q->cached_prod) { + __xskq_cons_read_addr_unchecked(q, q->cached_cons, addr); return true; } diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 127bcde06c86e..6c7cf8aadc792 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -384,6 +384,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, ifobj->umem = &ifobj->umem_arr[0]; ifobj->xsk = &ifobj->xsk_arr[0]; ifobj->use_poll = false; + ifobj->pacing_on = true; ifobj->pkt_stream = test->pkt_stream_default; if (i == 0) { @@ -445,6 +446,12 @@ static void test_spec_set_name(struct test_spec *test, const char *name) strncpy(test->name, name, MAX_TEST_NAME_SIZE); } +static void pkt_stream_reset(struct pkt_stream *pkt_stream) +{ + if (pkt_stream) + pkt_stream->rx_pkt_nb = 0; +} + static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) { if (pkt_nb >= pkt_stream->nb_pkts) @@ -507,8 +514,7 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb pkt_stream->nb_pkts = nb_pkts; for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + - DEFAULT_OFFSET; + pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size; pkt_stream->pkts[i].len = pkt_len; pkt_stream->pkts[i].payload = i; @@ -536,14 +542,14 @@ static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len) test->ifobj_rx->pkt_stream = pkt_stream; } -static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, u32 offset) +static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) { struct xsk_umem_info *umem = test->ifobj_tx->umem; struct pkt_stream *pkt_stream; u32 i; pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default); - for (i = 0; i < test->pkt_stream_default->nb_pkts; i += 2) { + for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) { pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset; pkt_stream->pkts[i].len = pkt_len; } @@ -635,6 +641,25 @@ static void pkt_dump(void *pkt, u32 len) fprintf(stdout, "---------------------------------------\n"); } +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, + u64 pkt_stream_addr) +{ + u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; + u32 offset = addr % umem->frame_size, expected_offset = 0; + + if (!pkt_stream->use_addr_for_fill) + pkt_stream_addr = 0; + + expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + + if (offset == expected_offset) + return true; + + ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset, + offset); + return false; +} + static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) { void *data = xsk_umem__get_data(buffer, addr); @@ -717,13 +742,15 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * struct pollfd *fds) { struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream); + struct xsk_umem_info *umem = xsk->umem; u32 idx_rx = 0, idx_fq = 0, rcvd, i; + u32 total = 0; int ret; while (pkt) { rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(-ret); @@ -731,16 +758,16 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * continue; } - ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); - if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { + if (xsk_ring_prod__needs_wakeup(&umem->fq)) { ret = poll(fds, 1, POLL_TMOUT); if (ret < 0) exit_with_error(-ret); } - ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); + ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { @@ -757,15 +784,25 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info * orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); - if (!is_pkt_valid(pkt, xsk->umem->buffer, addr, desc->len)) + + if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len)) + return; + if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr)) return; - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; + *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; pkt = pkt_stream_get_next_rx_pkt(pkt_stream); } - xsk_ring_prod__submit(&xsk->umem->fq, rcvd); + xsk_ring_prod__submit(&umem->fq, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); + + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight -= rcvd; + total += rcvd; + if (pkts_in_flight < umem->num_frames) + pthread_cond_signal(&pacing_cond); + pthread_mutex_unlock(&pacing_mutex); } } @@ -791,10 +828,19 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb) valid_pkts++; } + pthread_mutex_lock(&pacing_mutex); + pkts_in_flight += valid_pkts; + if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) { + kick_tx(xsk); + pthread_cond_wait(&pacing_cond, &pacing_mutex); + } + pthread_mutex_unlock(&pacing_mutex); + xsk_ring_prod__submit(&xsk->tx, i); xsk->outstanding_tx += valid_pkts; - complete_pkts(xsk, BATCH_SIZE); + complete_pkts(xsk, i); + usleep(10); return i; } @@ -813,8 +859,6 @@ static void send_pkts(struct ifobject *ifobject) fds.events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - u32 sent; - if (ifobject->use_poll) { int ret; @@ -826,9 +870,7 @@ static void send_pkts(struct ifobject *ifobject) continue; } - sent = __send_pkts(ifobject, pkt_cnt); - pkt_cnt += sent; - usleep(10); + pkt_cnt += __send_pkts(ifobject, pkt_cnt); } wait_for_tx_completion(ifobject->xsk); @@ -913,18 +955,17 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; u32 ctr = 0; void *bufs; + int ret; bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); if (bufs == MAP_FAILED) exit_with_error(errno); - while (ctr++ < SOCK_RECONF_CTR) { - int ret; - - ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz); - if (ret) - exit_with_error(-ret); + ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz); + if (ret) + exit_with_error(-ret); + while (ctr++ < SOCK_RECONF_CTR) { ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i], ifobject, i); if (!ret) @@ -971,13 +1012,18 @@ static void *worker_testapp_validate_tx(void *arg) static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) { - u32 idx = 0, i; + u32 idx = 0, i, buffers_to_fill; int ret; - ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); - if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) + if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) + buffers_to_fill = umem->num_frames; + else + buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS; + + ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); + if (ret != buffers_to_fill) exit_with_error(ENOSPC); - for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) { + for (i = 0; i < buffers_to_fill; i++) { u64 addr; if (pkt_stream->use_addr_for_fill) { @@ -987,12 +1033,12 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream break; addr = pkt->addr; } else { - addr = (i % umem->num_frames) * umem->frame_size + DEFAULT_OFFSET; + addr = i * umem->frame_size; } *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; } - xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); + xsk_ring_prod__submit(&umem->fq, buffers_to_fill); } static void *worker_testapp_validate_rx(void *arg) @@ -1032,6 +1078,8 @@ static void testapp_validate_traffic(struct test_spec *test) exit_with_error(errno); test->current_step++; + pkt_stream_reset(ifobj_rx->pkt_stream); + pkts_in_flight = 0; /*Spawn RX thread */ pthread_create(&t0, NULL, ifobj_rx->func_ptr, test); @@ -1108,6 +1156,13 @@ static void testapp_bpf_res(struct test_spec *test) testapp_validate_traffic(test); } +static void testapp_headroom(struct test_spec *test) +{ + test_spec_set_name(test, "UMEM_HEADROOM"); + test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; + testapp_validate_traffic(test); +} + static void testapp_stats(struct test_spec *test) { int i; @@ -1115,6 +1170,8 @@ static void testapp_stats(struct test_spec *test) for (i = 0; i < STAT_TEST_TYPE_MAX; i++) { test_spec_reset(test); stat_test_type = i; + /* No or few packets will be received so cannot pace packets */ + test->ifobj_tx->pacing_on = false; switch (stat_test_type) { case STAT_TEST_RX_DROPPED: @@ -1181,7 +1238,7 @@ static bool testapp_unaligned(struct test_spec *test) test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; /* Let half of the packets straddle a buffer boundrary */ - pkt_stream_replace_half(test, PKT_SIZE, test->ifobj_tx->umem->frame_size - 32); + pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); test->ifobj_rx->pkt_stream->use_addr_for_fill = true; testapp_validate_traffic(test); @@ -1189,6 +1246,15 @@ static bool testapp_unaligned(struct test_spec *test) return true; } +static void testapp_single_pkt(struct test_spec *test) +{ + struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; + + pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); + testapp_validate_traffic(test); + pkt_stream_restore_default(test); +} + static void testapp_invalid_desc(struct test_spec *test) { struct pkt pkts[] = { @@ -1270,6 +1336,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ test_spec_set_name(test, "RUN_TO_COMPLETION"); testapp_validate_traffic(test); break; + case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: + test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); + testapp_single_pkt(test); + break; case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; @@ -1305,6 +1375,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ if (!testapp_unaligned(test)) return; break; + case TEST_TYPE_HEADROOM: + testapp_headroom(test); + break; default: break; } diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h index 5ac4a5e647445..2f705f44b7483 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xdpxceiver.h @@ -35,13 +35,13 @@ #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 -#define BATCH_SIZE 8 +#define BATCH_SIZE 64 #define POLL_TMOUT 1000 #define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) #define RX_FULL_RXQSIZE 32 -#define DEFAULT_OFFSET 256 +#define UMEM_HEADROOM_TEST_SIZE 128 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -55,11 +55,13 @@ enum test_mode { enum test_type { TEST_TYPE_RUN_TO_COMPLETION, TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, + TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, TEST_TYPE_POLL, TEST_TYPE_UNALIGNED, TEST_TYPE_ALIGNED_INV_DESC, TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, TEST_TYPE_UNALIGNED_INV_DESC, + TEST_TYPE_HEADROOM, TEST_TYPE_TEARDOWN, TEST_TYPE_BIDI, TEST_TYPE_STATS, @@ -136,6 +138,7 @@ struct ifobject { bool tx_on; bool rx_on; bool use_poll; + bool pacing_on; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -151,5 +154,9 @@ struct test_spec { }; pthread_barrier_t barr; +pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; + +u32 pkts_in_flight; #endif /* XDPXCEIVER_H */