Skip to content

Commit

Permalink
bpf: add frags support to the bpf_xdp_adjust_tail() API
Browse files Browse the repository at this point in the history
This change adds support for tail growing and shrinking for XDP frags.

When called on a non-linear packet with a grow request, it will work
on the last fragment of the packet. So the maximum grow size is the
last fragments tailroom, i.e. no new buffer will be allocated.
A XDP frags capable driver is expected to set frag_size in xdp_rxq_info
data structure to notify the XDP core the fragment size.
frag_size set to 0 is interpreted by the XDP core as tail growing is
not allowed.
Introduce __xdp_rxq_info_reg utility routine to initialize frag_size field.

When shrinking, it will work from the last fragment, all the way down to
the base buffer depending on the shrinking size. It's important to mention
that once you shrink down the fragment(s) are freed, so you can not grow
again to the original size.

Acked-by: Toke Hoiland-Jorgensen <toke@redhat.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://lore.kernel.org/r/eabda3485dda4f2f158b477729337327e609461d.1642758637.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Eelco Chaudron authored and Alexei Starovoitov committed Jan 21, 2022
1 parent 0165cc8 commit bf25146
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 8 deletions.
3 changes: 2 additions & 1 deletion drivers/net/ethernet/marvell/mvneta.c
Original file line number Diff line number Diff line change
Expand Up @@ -3298,7 +3298,8 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
return err;
}

err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0,
PAGE_SIZE);
if (err < 0)
goto err_free_pp;

Expand Down
16 changes: 14 additions & 2 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ struct xdp_rxq_info {
u32 reg_state;
struct xdp_mem_info mem;
unsigned int napi_id;
u32 frag_size;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */

struct xdp_txq_info {
Expand Down Expand Up @@ -304,6 +305,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
return xdp_frame;
}

void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
struct xdp_buff *xdp);
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
Expand Down Expand Up @@ -340,8 +343,17 @@ static inline void xdp_release_frame(struct xdp_frame *xdpf)
__xdp_release_frame(xdpf->data, mem);
}

int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index, unsigned int napi_id);
int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index,
unsigned int napi_id, u32 frag_size);
static inline int
xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index,
unsigned int napi_id)
{
return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0);
}

void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
Expand Down
65 changes: 65 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -3830,11 +3830,76 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};

static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
struct xdp_rxq_info *rxq = xdp->rxq;
unsigned int tailroom;

if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
return -EOPNOTSUPP;

tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
if (unlikely(offset > tailroom))
return -EINVAL;

memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
skb_frag_size_add(frag, offset);
sinfo->xdp_frags_size += offset;

return 0;
}

static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
int i, n_frags_free = 0, len_free = 0;

if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
return -EINVAL;

for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
skb_frag_t *frag = &sinfo->frags[i];
int shrink = min_t(int, offset, skb_frag_size(frag));

len_free += shrink;
offset -= shrink;

if (skb_frag_size(frag) == shrink) {
struct page *page = skb_frag_page(frag);

__xdp_return(page_address(page), &xdp->rxq->mem,
false, NULL);
n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
break;
}
}
sinfo->nr_frags -= n_frags_free;
sinfo->xdp_frags_size -= len_free;

if (unlikely(!sinfo->nr_frags)) {
xdp_buff_clear_frags_flag(xdp);
xdp->data_end -= offset;
}

return 0;
}

BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
{
void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
void *data_end = xdp->data_end + offset;

if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
if (offset < 0)
return bpf_xdp_frags_shrink_tail(xdp, -offset);

return bpf_xdp_frags_increase_tail(xdp, offset);
}

/* Notice that xdp_data_hard_end have reserved some tailroom */
if (unlikely(data_end > data_hard_end))
return -EINVAL;
Expand Down
12 changes: 7 additions & 5 deletions net/core/xdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,9 @@ static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
}

/* Returns 0 on success, negative on failure */
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index, unsigned int napi_id)
int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
struct net_device *dev, u32 queue_index,
unsigned int napi_id, u32 frag_size)
{
if (!dev) {
WARN(1, "Missing net_device from driver");
Expand All @@ -185,11 +186,12 @@ int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
xdp_rxq->dev = dev;
xdp_rxq->queue_index = queue_index;
xdp_rxq->napi_id = napi_id;
xdp_rxq->frag_size = frag_size;

xdp_rxq->reg_state = REG_STATE_REGISTERED;
return 0;
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg);

void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
{
Expand Down Expand Up @@ -369,8 +371,8 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
struct xdp_buff *xdp)
void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
struct xdp_buff *xdp)
{
struct xdp_mem_allocator *xa;
struct page *page;
Expand Down

0 comments on commit bf25146

Please sign in to comment.