Skip to content

Commit

Permalink
Merge branch 'xdp-grow-tail'
Browse files Browse the repository at this point in the history
Jesper Dangaard Brouer says:

====================
V4:
- Fixup checkpatch.pl issues
- Collected more ACKs

V3:
- Fix issue on virtio_net patch spotted by Jason Wang
- Adjust name for variable in mlx5 patch
- Collected more ACKs

V2:
- Fix bug in mlx5 for XDP_PASS case
- Collected nitpicks and ACKs from mailing list

V1:
- Fix bug in dpaa2

XDP have evolved to support several frame sizes, but xdp_buff was not
updated with this information. This have caused the side-effect that
XDP frame data hard end is unknown. This have limited the BPF-helper
bpf_xdp_adjust_tail to only shrink the packet. This patchset address
this and add packet tail extend/grow.

The purpose of the patchset is ALSO to reserve a memory area that can be
used for storing extra information, specifically for extending XDP with
multi-buffer support. One proposal is to use same layout as
skb_shared_info, which is why this area is currently 320 bytes.

When converting xdp_frame to SKB (veth and cpumap), the full tailroom
area can now be used and SKB truesize is now correct. For most
drivers this result in a much larger tailroom in SKB "head" data
area. The network stack can now take advantage of this when doing SKB
coalescing. Thus, a good driver test is to use xdp_redirect_cpu from
samples/bpf/ and do some TCP stream testing.

Use-cases for tail grow/extend:
(1) IPsec / XFRM needs a tail extend[1][2].
(2) DNS-cache responses in XDP.
(3) HAProxy ALOHA would need it to convert to XDP.
(4) Add tail info e.g. timestamp and collect via tcpdump

[1] http://vger.kernel.org/netconf2019_files/xfrm_xdp.pdf
[2] http://vger.kernel.org/netconf2019.html

Examples on howto access the tail area of an XDP packet is shown in the
XDP-tutorial example[3].

[3] https://github.com/xdp-project/xdp-tutorial/blob/master/experiment01-tailgrow/
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed May 15, 2020
2 parents d00f26b + 7ae2e00 commit 5cc5924
Show file tree
Hide file tree
Showing 43 changed files with 451 additions and 115 deletions.
1 change: 1 addition & 0 deletions drivers/net/ethernet/amazon/ena/ena_netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1606,6 +1606,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
"%s qid %d\n", __func__, rx_ring->qid);
res_budget = budget;
xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = ENA_PAGE_SIZE;

do {
xdp_verdict = XDP_PASS;
Expand Down
5 changes: 3 additions & 2 deletions drivers/net/ethernet/amazon/ena/ena_netdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,9 @@
* The buffer size we share with the device is defined to be ENA_PAGE_SIZE
*/

#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
VLAN_HLEN - XDP_PACKET_HEADROOM)
#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
VLAN_HLEN - XDP_PACKET_HEADROOM - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = *data_ptr + *len;
xdp.rxq = &rxr->xdp_rxq;
xdp.frame_sz = PAGE_SIZE; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */
orig_data = xdp.data;

rcu_read_lock();
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/cavium/thunder/nicvf_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
xdp.rxq = &rq->xdp_rxq;
xdp.frame_sz = RCV_FRAG_LEN + XDP_PACKET_HEADROOM;
orig_data = xdp.data;

rcu_read_lock();
Expand Down
7 changes: 7 additions & 0 deletions drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,9 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
xdp_set_data_meta_invalid(&xdp);
xdp.rxq = &ch->xdp_rxq;

xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE -
(dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM);

xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);

/* xdp.data pointer may have changed */
Expand Down Expand Up @@ -366,7 +369,11 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
dma_unmap_page(priv->net_dev->dev.parent, addr,
DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL);
ch->buf_count--;

/* Allow redirect use of full headroom */
xdp.data_hard_start = vaddr;
xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE;

err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
if (unlikely(err))
ch->stats.xdp_drop++;
Expand Down
30 changes: 25 additions & 5 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1507,6 +1507,22 @@ static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}

static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;

#if (PAGE_SIZE < 8192)
truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = i40e_rx_offset(rx_ring) ?
SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}

/**
* i40e_alloc_mapped_page - recycle or make a new page
* @rx_ring: ring to use
Expand Down Expand Up @@ -2246,13 +2262,11 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);

#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);

rx_buffer->page_offset += truesize;
#endif
}
Expand Down Expand Up @@ -2335,6 +2349,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
bool failure = false;
struct xdp_buff xdp;

#if (PAGE_SIZE < 8192)
xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
#endif
xdp.rxq = &rx_ring->xdp_rxq;

while (likely(total_rx_packets < (unsigned int)budget)) {
Expand Down Expand Up @@ -2389,7 +2406,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;

#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
#endif
skb = i40e_run_xdp(rx_ring, &xdp);
}

Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/intel/i40e/i40e_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,12 +531,14 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_res, xdp_xmit = 0;
bool failure = false;
struct sk_buff *skb;
struct xdp_buff xdp;

xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);

while (likely(total_rx_packets < (unsigned int)budget)) {
struct i40e_rx_buffer *bi;
Expand Down
34 changes: 25 additions & 9 deletions drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,22 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
return 0;
}

static unsigned int ice_rx_frame_truesize(struct ice_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;

#if (PAGE_SIZE < 8192)
truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ice_rx_offset(rx_ring) ?
SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}

/**
* ice_run_xdp - Executes an XDP program on initialized xdp_buff
* @rx_ring: Rx ring
Expand Down Expand Up @@ -991,6 +1007,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
bool failure;

xdp.rxq = &rx_ring->xdp_rxq;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0);
#endif

/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
Expand Down Expand Up @@ -1038,6 +1058,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
#endif

rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
Expand All @@ -1051,16 +1075,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
if (!xdp_res)
goto construct_skb;
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
unsigned int truesize;

#if (PAGE_SIZE < 8192)
truesize = ice_rx_pg_size(rx_ring) / 2;
#else
truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
size);
#endif
xdp_xmit |= xdp_res;
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
} else {
rx_buf->pagecnt_bias++;
}
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -840,11 +840,13 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_xmit = 0;
bool failure = false;
struct xdp_buff xdp;

xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);

while (likely(total_rx_packets < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
Expand Down
33 changes: 26 additions & 7 deletions drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2244,19 +2244,30 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
return ERR_PTR(-result);
}

static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;

#if (PAGE_SIZE < 8192)
truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}

static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
struct ixgbe_rx_buffer *rx_buffer,
unsigned int size)
{
unsigned int truesize = ixgbe_rx_frame_truesize(rx_ring, size);
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;

rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
SKB_DATA_ALIGN(size);

rx_buffer->page_offset += truesize;
#endif
}
Expand Down Expand Up @@ -2290,6 +2301,11 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,

xdp.rxq = &rx_ring->xdp_rxq;

/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0);
#endif

while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
struct ixgbe_rx_buffer *rx_buffer;
Expand Down Expand Up @@ -2323,7 +2339,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
xdp.data_hard_start = xdp.data -
ixgbe_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;

#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
#endif
skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
}

Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,12 +431,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct ixgbe_adapter *adapter = q_vector->adapter;
u16 cleaned_count = ixgbe_desc_unused(rx_ring);
struct xdp_umem *umem = rx_ring->xsk_umem;
unsigned int xdp_res, xdp_xmit = 0;
bool failure = false;
struct sk_buff *skb;
struct xdp_buff xdp;

xdp.rxq = &rx_ring->xdp_rxq;
xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);

while (likely(total_rx_packets < budget)) {
union ixgbe_adv_rx_desc *rx_desc;
Expand Down
34 changes: 27 additions & 7 deletions drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1095,19 +1095,31 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
return ERR_PTR(-result);
}

static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
unsigned int size)
{
unsigned int truesize;

#if (PAGE_SIZE < 8192)
truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
#else
truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
SKB_DATA_ALIGN(size);
#endif
return truesize;
}

static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size);

#if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = ring_uses_build_skb(rx_ring) ?
SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
SKB_DATA_ALIGN(size);

rx_buffer->page_offset += truesize;
#endif
}
Expand All @@ -1125,6 +1137,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,

xdp.rxq = &rx_ring->xdp_rxq;

/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0);
#endif

while (likely(total_rx_packets < budget)) {
struct ixgbevf_rx_buffer *rx_buffer;
union ixgbe_adv_rx_desc *rx_desc;
Expand Down Expand Up @@ -1157,7 +1174,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
xdp.data_hard_start = xdp.data -
ixgbevf_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;

#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
#endif
skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
}

Expand Down
Loading

0 comments on commit 5cc5924

Please sign in to comment.