From 484beac2ffc1d1dde5fde601deb28f3a82ac250e Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:20 +0800 Subject: [PATCH 01/10] virtio-net: disable the hole mechanism for xdp XDP core assumes that the frame_size of xdp_buff and the length of the frag are PAGE_SIZE. The hole may cause the processing of xdp to fail, so we disable the hole mechanism when xdp is set. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7723b2a49d8ec..a3271d2c7c022 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1426,8 +1426,11 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, /* To avoid internal fragmentation, if there is very likely not * enough space for another buffer, add the remaining space to * the current buffer. + * XDP core assumes that frame_size of xdp_buff and the length + * of the frag are PAGE_SIZE, so we disable the hole mechanism. */ - len += hole; + if (!headroom) + len += hole; alloc_frag->offset += hole; } From e814b958ad8857d6f7354d3a189776eb604d86dd Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:21 +0800 Subject: [PATCH 02/10] virtio-net: fix calculation of MTU for single-buffer xdp When single-buffer xdp is loaded, the size of the buffer filled each time is 'sz = (PAGE_SIZE - headroom - tailroom)', which is the maximum packet length that the driver allows the device to pass in. Otherwise, the packet with a length greater than sz will come in, so num_buf will be greater than or equal to 2, and xdp_linearize_page() will be performed and the packet will be dropped because the total length is greater than PAGE_SIZE. So the maximum value of MTU for single-buffer xdp is 'max_sz = sz - ETH_HLEN'. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a3271d2c7c022..81d92978cc92f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3081,7 +3081,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi) static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, struct netlink_ext_ack *extack) { - unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); + unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + + sizeof(struct skb_shared_info)); + unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN; struct virtnet_info *vi = netdev_priv(dev); struct bpf_prog *old_prog; u16 xdp_qp = 0, curr_qp; @@ -3106,7 +3108,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (dev->mtu > max_sz) { NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); - netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); + netdev_warn(dev, "XDP requires MTU less than %u\n", max_sz); return -EINVAL; } From 8d9bc36de5fc7b64bbce7b479dbe0ffdcb31b3b5 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:22 +0800 Subject: [PATCH 03/10] virtio-net: set up xdp for multi buffer packets When the xdp program sets xdp.frags, which means it can process multi-buffer packets over larger MTU, so we continue to support xdp. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 81d92978cc92f..ba4d5e66a4d71 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3106,9 +3106,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return -EINVAL; } - if (dev->mtu > max_sz) { - NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); - netdev_warn(dev, "XDP requires MTU less than %u\n", max_sz); + if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) { + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags"); + netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz); return -EINVAL; } From 50bd14bc98fa0c86ea1e688d93ef1ffe8f1572a0 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:23 +0800 Subject: [PATCH 04/10] virtio-net: update bytes calculation for xdp_frame Update relative record value for xdp_frame as basis for multi-buffer xdp transmission. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba4d5e66a4d71..5e0cdab897db6 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -665,7 +665,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, if (likely(is_xdp_frame(ptr))) { struct xdp_frame *frame = ptr_to_xdp(ptr); - bytes += frame->len; + bytes += xdp_get_frame_len(frame); xdp_return_frame(frame); } else { struct sk_buff *skb = ptr; @@ -1611,7 +1611,7 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) } else { struct xdp_frame *frame = ptr_to_xdp(ptr); - bytes += frame->len; + bytes += xdp_get_frame_len(frame); xdp_return_frame(frame); } packets++; From ef75cb51f13941cf7633227ade43c4d86ecbc336 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:24 +0800 Subject: [PATCH 05/10] virtio-net: build xdp_buff with multi buffers Support xdp for multi buffer packets in mergeable mode. Putting the first buffer as the linear part for xdp_buff, and the rest of the buffers as non-linear fragments to struct skb_shared_info in the tailroom belonging to xdp_buff. Let 'truesize' return to its literal meaning, that is, when xdp is set, it includes the length of headroom and tailroom. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 108 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 8 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5e0cdab897db6..32681881fae3b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -938,6 +938,91 @@ static struct sk_buff *receive_big(struct net_device *dev, return NULL; } +/* TODO: build xdp in big mode */ +static int virtnet_build_xdp_buff_mrg(struct net_device *dev, + struct virtnet_info *vi, + struct receive_queue *rq, + struct xdp_buff *xdp, + void *buf, + unsigned int len, + unsigned int frame_sz, + u16 *num_buf, + unsigned int *xdp_frags_truesize, + struct virtnet_rq_stats *stats) +{ + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + unsigned int headroom, tailroom, room; + unsigned int truesize, cur_frag_size; + struct skb_shared_info *shinfo; + unsigned int xdp_frags_truesz = 0; + struct page *page; + skb_frag_t *frag; + int offset; + void *ctx; + + xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq); + xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM, + VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true); + + if (*num_buf > 1) { + /* If we want to build multi-buffer xdp, we need + * to specify that the flags of xdp_buff have the + * XDP_FLAGS_HAS_FRAG bit. + */ + if (!xdp_buff_has_frags(xdp)) + xdp_buff_set_frags_flag(xdp); + + shinfo = xdp_get_shared_info_from_buff(xdp); + shinfo->nr_frags = 0; + shinfo->xdp_frags_size = 0; + } + + if ((*num_buf - 1) > MAX_SKB_FRAGS) + return -EINVAL; + + while ((--*num_buf) >= 1) { + buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx); + if (unlikely(!buf)) { + pr_debug("%s: rx error: %d buffers out of %d missing\n", + dev->name, *num_buf, + virtio16_to_cpu(vi->vdev, hdr->num_buffers)); + dev->stats.rx_length_errors++; + return -EINVAL; + } + + stats->bytes += len; + page = virt_to_head_page(buf); + offset = buf - page_address(page); + + truesize = mergeable_ctx_to_truesize(ctx); + headroom = mergeable_ctx_to_headroom(ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + + cur_frag_size = truesize; + xdp_frags_truesz += cur_frag_size; + if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) { + put_page(page); + pr_debug("%s: rx error: len %u exceeds truesize %lu\n", + dev->name, len, (unsigned long)(truesize - room)); + dev->stats.rx_length_errors++; + return -EINVAL; + } + + frag = &shinfo->frags[shinfo->nr_frags++]; + __skb_frag_set_page(frag, page); + skb_frag_off_set(frag, offset); + skb_frag_size_set(frag, len); + if (page_is_pfmemalloc(page)) + xdp_buff_set_frag_pfmemalloc(xdp); + + shinfo->xdp_frags_size += len; + } + + *xdp_frags_truesize = xdp_frags_truesz; + return 0; +} + static struct sk_buff *receive_mergeable(struct net_device *dev, struct virtnet_info *vi, struct receive_queue *rq, @@ -956,15 +1041,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); unsigned int metasize = 0; + unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); unsigned int frame_sz; int err; head_skb = NULL; stats->bytes += len - vi->hdr_len; - if (unlikely(len > truesize)) { + if (unlikely(len > truesize - room)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)ctx); + dev->name, len, (unsigned long)(truesize - room)); dev->stats.rx_length_errors++; goto err_skb; } @@ -990,10 +1077,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (unlikely(hdr->hdr.gso_type)) goto err_xdp; - /* Buffers with headroom use PAGE_SIZE as alloc size, - * see add_recvbuf_mergeable() + get_mergeable_buf_len() + /* Now XDP core assumes frag size is PAGE_SIZE, but buffers + * with headroom may add hole in truesize, which + * make their length exceed PAGE_SIZE. So we disabled the + * hole mechanism for xdp. See add_recvbuf_mergeable(). */ - frame_sz = headroom ? PAGE_SIZE : truesize; + frame_sz = truesize; /* This happens when rx buffer size is underestimated * or headroom is not enough because of the buffer @@ -1146,9 +1235,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, page = virt_to_head_page(buf); truesize = mergeable_ctx_to_truesize(ctx); - if (unlikely(len > truesize)) { + headroom = mergeable_ctx_to_headroom(ctx); + tailroom = headroom ? sizeof(struct skb_shared_info) : 0; + room = SKB_DATA_ALIGN(headroom + tailroom); + if (unlikely(len > truesize - room)) { pr_debug("%s: rx error: len %u exceeds truesize %lu\n", - dev->name, len, (unsigned long)ctx); + dev->name, len, (unsigned long)(truesize - room)); dev->stats.rx_length_errors++; goto err_skb; } @@ -1435,7 +1527,7 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, } sg_init_one(rq->sg, buf, len); - ctx = mergeable_len_to_ctx(len, headroom); + ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) put_page(virt_to_head_page(buf)); From 22174f79a44baf5e46faafff1d7b21363431b93a Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:25 +0800 Subject: [PATCH 06/10] virtio-net: construct multi-buffer xdp in mergeable Build multi-buffer xdp using virtnet_build_xdp_buff_mrg(). For the prefilled buffer before xdp is set, we will probably use vq reset in the future. At the same time, virtio net currently uses comp pages, and bpf_xdp_frags_increase_tail() needs to calculate the tailroom of the last frag, which will involve the offset of the corresponding page and cause a negative value, so we disable tail increase by not setting xdp_rxq->frag_size. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 58 ++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 32681881fae3b..262d215b15182 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1043,7 +1043,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, unsigned int metasize = 0; unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); - unsigned int frame_sz; + unsigned int frame_sz, xdp_room; int err; head_skb = NULL; @@ -1064,11 +1064,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_lock(); xdp_prog = rcu_dereference(rq->xdp_prog); if (xdp_prog) { + unsigned int xdp_frags_truesz = 0; + struct skb_shared_info *shinfo; struct xdp_frame *xdpf; struct page *xdp_page; struct xdp_buff xdp; void *data; u32 act; + int i; /* Transient failure which in theory could occur if * in-flight packets from before XDP was enabled reach @@ -1084,14 +1087,16 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, */ frame_sz = truesize; - /* This happens when rx buffer size is underestimated - * or headroom is not enough because of the buffer - * was refilled before XDP is set. This should only - * happen for the first several packets, so we don't - * care much about its performance. + /* This happens when headroom is not enough because + * of the buffer was prefilled before XDP is set. + * This should only happen for the first several packets. + * In fact, vq reset can be used here to help us clean up + * the prefilled buffers, but many existing devices do not + * support it, and we don't want to bother users who are + * using xdp normally. */ - if (unlikely(num_buf > 1 || - headroom < virtnet_get_headroom(vi))) { + if (!xdp_prog->aux->xdp_has_frags && + (num_buf > 1 || headroom < virtnet_get_headroom(vi))) { /* linearize data for XDP */ xdp_page = xdp_linearize_page(rq, &num_buf, page, offset, @@ -1102,17 +1107,29 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, if (!xdp_page) goto err_xdp; offset = VIRTIO_XDP_HEADROOM; + } else if (unlikely(headroom < virtnet_get_headroom(vi))) { + xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM + + sizeof(struct skb_shared_info)); + if (len + xdp_room > PAGE_SIZE) + goto err_xdp; + + xdp_page = alloc_page(GFP_ATOMIC); + if (!xdp_page) + goto err_xdp; + + memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM, + page_address(page) + offset, len); + frame_sz = PAGE_SIZE; + offset = VIRTIO_XDP_HEADROOM; } else { xdp_page = page; } - /* Allow consuming headroom but reserve enough space to push - * the descriptor on if we get an XDP_TX return code. - */ data = page_address(xdp_page) + offset; - xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq); - xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len, - VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true); + err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz, + &num_buf, &xdp_frags_truesz, stats); + if (unlikely(err)) + goto err_xdp_frags; act = bpf_prog_run_xdp(xdp_prog, &xdp); stats->xdp_packets++; @@ -1208,6 +1225,19 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, __free_pages(xdp_page, 0); goto err_xdp; } +err_xdp_frags: + if (unlikely(xdp_page != page)) + __free_pages(xdp_page, 0); + + if (xdp_buff_has_frags(&xdp)) { + shinfo = xdp_get_shared_info_from_buff(&xdp); + for (i = 0; i < shinfo->nr_frags; i++) { + xdp_page = skb_frag_page(&shinfo->frags[i]); + put_page(xdp_page); + } + } + + goto err_xdp; } rcu_read_unlock(); From 97717e8dbda1dede65c4df12891332502df632f3 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:26 +0800 Subject: [PATCH 07/10] virtio-net: transmit the multi-buffer xdp This serves as the basis for XDP_TX and XDP_REDIRECT to send a multi-buffer xdp_frame. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 262d215b15182..6d3cf8887dd7f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -570,22 +570,43 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; - int err; + struct skb_shared_info *shinfo; + u8 nr_frags = 0; + int err, i; if (unlikely(xdpf->headroom < vi->hdr_len)) return -EOVERFLOW; - /* Make room for virtqueue hdr (also change xdpf->headroom?) */ + if (unlikely(xdp_frame_has_frags(xdpf))) { + shinfo = xdp_get_shared_info_from_frame(xdpf); + nr_frags = shinfo->nr_frags; + } + + /* In wrapping function virtnet_xdp_xmit(), we need to free + * up the pending old buffers, where we need to calculate the + * position of skb_shared_info in xdp_get_frame_len() and + * xdp_return_frame(), which will involve to xdpf->data and + * xdpf->headroom. Therefore, we need to update the value of + * headroom synchronously here. + */ + xdpf->headroom -= vi->hdr_len; xdpf->data -= vi->hdr_len; /* Zero header and leave csum up to XDP layers */ hdr = xdpf->data; memset(hdr, 0, vi->hdr_len); xdpf->len += vi->hdr_len; - sg_init_one(sq->sg, xdpf->data, xdpf->len); + sg_init_table(sq->sg, nr_frags + 1); + sg_set_buf(sq->sg, xdpf->data, xdpf->len); + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &shinfo->frags[i]; + + sg_set_page(&sq->sg[i + 1], skb_frag_page(frag), + skb_frag_size(frag), skb_frag_off(frag)); + } - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), - GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1, + xdp_to_ptr(xdpf), GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ From b26aa481b4b710051dd663c89ed31f705a7a67eb Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:27 +0800 Subject: [PATCH 08/10] virtio-net: build skb from multi-buffer xdp This converts the xdp_buff directly to a skb, including multi-buffer and single buffer xdp. We'll isolate the construction of skb based on xdp from page_to_skb(). Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 49 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 6d3cf8887dd7f..5d3aad2ef8d00 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -959,6 +959,55 @@ static struct sk_buff *receive_big(struct net_device *dev, return NULL; } +/* Why not use xdp_build_skb_from_frame() ? + * XDP core assumes that xdp frags are PAGE_SIZE in length, while in + * virtio-net there are 2 points that do not match its requirements: + * 1. The size of the prefilled buffer is not fixed before xdp is set. + * 2. xdp_build_skb_from_frame() does more checks that we don't need, + * like eth_type_trans() (which virtio-net does in receive_buf()). + */ +static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev, + struct virtnet_info *vi, + struct xdp_buff *xdp, + unsigned int xdp_frags_truesz) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + unsigned int headroom, data_len; + struct sk_buff *skb; + int metasize; + u8 nr_frags; + + if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) { + pr_debug("Error building skb as missing reserved tailroom for xdp"); + return NULL; + } + + if (unlikely(xdp_buff_has_frags(xdp))) + nr_frags = sinfo->nr_frags; + + skb = build_skb(xdp->data_hard_start, xdp->frame_sz); + if (unlikely(!skb)) + return NULL; + + headroom = xdp->data - xdp->data_hard_start; + data_len = xdp->data_end - xdp->data; + skb_reserve(skb, headroom); + __skb_put(skb, data_len); + + metasize = xdp->data - xdp->data_meta; + metasize = metasize > 0 ? metasize : 0; + if (metasize) + skb_metadata_set(skb, metasize); + + if (unlikely(xdp_buff_has_frags(xdp))) + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, + xdp_frags_truesz, + xdp_buff_is_frag_pfmemalloc(xdp)); + + return skb; +} + /* TODO: build xdp in big mode */ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, struct virtnet_info *vi, From 18117a842ab029df139f776bf31eebff6d0e0730 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:28 +0800 Subject: [PATCH 09/10] virtio-net: remove xdp related info from page_to_skb() For the clear construction of xdp_buff, we remove the xdp processing interleaved with page_to_skb(). Now, the logic of xdp and building skb from xdp are separate and independent. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 41 +++++++++------------------------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5d3aad2ef8d00..c59048668233c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -446,9 +446,7 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct receive_queue *rq, struct page *page, unsigned int offset, - unsigned int len, unsigned int truesize, - bool hdr_valid, unsigned int metasize, - unsigned int headroom) + unsigned int len, unsigned int truesize) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -466,21 +464,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - /* If headroom is not 0, there is an offset between the beginning of the - * data and the allocated space, otherwise the data and the allocated - * space are aligned. - * - * Buffers with headroom use PAGE_SIZE as alloc size, see - * add_recvbuf_mergeable() + get_mergeable_buf_len() - */ - truesize = headroom ? PAGE_SIZE : truesize; - tailroom = truesize - headroom; - buf = p - headroom; - + buf = p; len -= hdr_len; offset += hdr_padded_len; p += hdr_padded_len; - tailroom -= hdr_padded_len + len; + tailroom = truesize - hdr_padded_len - len; shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -510,7 +498,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, if (len <= skb_tailroom(skb)) copy = len; else - copy = ETH_HLEN + metasize; + copy = ETH_HLEN; skb_put_data(skb, p, copy); len -= copy; @@ -549,19 +537,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, give_pages(rq, page); ok: - /* hdr_valid means no XDP, so we can copy the vnet header */ - if (hdr_valid) { - hdr = skb_vnet_hdr(skb); - memcpy(hdr, hdr_p, hdr_len); - } + hdr = skb_vnet_hdr(skb); + memcpy(hdr, hdr_p, hdr_len); if (page_to_free) put_page(page_to_free); - if (metasize) { - __skb_pull(skb, metasize); - skb_metadata_set(skb, metasize); - } - return skb; } @@ -945,7 +925,7 @@ static struct sk_buff *receive_big(struct net_device *dev, { struct page *page = buf; struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0); + page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); stats->bytes += len - vi->hdr_len; if (unlikely(!skb)) @@ -1243,9 +1223,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, - len, PAGE_SIZE, false, - metasize, - headroom); + len, PAGE_SIZE); return head_skb; } break; @@ -1312,8 +1290,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); skip_xdp: - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, - metasize, headroom); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize); curr_skb = head_skb; if (unlikely(!curr_skb)) From fab89bafa95b6f333b0e2dca0ae07a0b3600e954 Mon Sep 17 00:00:00 2001 From: Heng Qi Date: Sat, 14 Jan 2023 16:22:29 +0800 Subject: [PATCH 10/10] virtio-net: support multi-buffer xdp Driver can pass the skb to stack by build_skb_from_xdp_buff(). Driver forwards multi-buffer packets using the send queue when XDP_TX and XDP_REDIRECT, and clears the reference of multi pages when XDP_DROP. Signed-off-by: Heng Qi Reviewed-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 65 +++++++--------------------------------- 1 file changed, 10 insertions(+), 55 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c59048668233c..68804528f1293 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1090,7 +1090,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx); - unsigned int metasize = 0; unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom); unsigned int frame_sz, xdp_room; @@ -1186,63 +1185,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, switch (act) { case XDP_PASS: - metasize = xdp.data - xdp.data_meta; - - /* recalculate offset to account for any header - * adjustments and minus the metasize to copy the - * metadata in page_to_skb(). Note other cases do not - * build an skb and avoid using offset - */ - offset = xdp.data - page_address(xdp_page) - - vi->hdr_len - metasize; - - /* recalculate len if xdp.data, xdp.data_end or - * xdp.data_meta were adjusted - */ - len = xdp.data_end - xdp.data + vi->hdr_len + metasize; - - /* recalculate headroom if xdp.data or xdp_data_meta - * were adjusted, note that offset should always point - * to the start of the reserved bytes for virtio_net - * header which are followed by xdp.data, that means - * that offset is equal to the headroom (when buf is - * starting at the beginning of the page, otherwise - * there is a base offset inside the page) but it's used - * with a different starting point (buf start) than - * xdp.data (buf start + vnet hdr size). If xdp.data or - * data_meta were adjusted by the xdp prog then the - * headroom size has changed and so has the offset, we - * can use data_hard_start, which points at buf start + - * vnet hdr size, to calculate the new headroom and use - * it later to compute buf start in page_to_skb() - */ - headroom = xdp.data - xdp.data_hard_start - metasize; - - /* We can only create skb based on xdp_page. */ - if (unlikely(xdp_page != page)) { - rcu_read_unlock(); + if (unlikely(xdp_page != page)) put_page(page); - head_skb = page_to_skb(vi, rq, xdp_page, offset, - len, PAGE_SIZE); - return head_skb; - } - break; + head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz); + rcu_read_unlock(); + return head_skb; case XDP_TX: stats->xdp_tx++; xdpf = xdp_convert_buff_to_frame(&xdp); if (unlikely(!xdpf)) { - if (unlikely(xdp_page != page)) - put_page(xdp_page); - goto err_xdp; + netdev_dbg(dev, "convert buff to frame failed for xdp\n"); + goto err_xdp_frags; } err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); if (unlikely(!err)) { xdp_return_frame_rx_napi(xdpf); } else if (unlikely(err < 0)) { trace_xdp_exception(vi->dev, xdp_prog, act); - if (unlikely(xdp_page != page)) - put_page(xdp_page); - goto err_xdp; + goto err_xdp_frags; } *xdp_xmit |= VIRTIO_XDP_TX; if (unlikely(xdp_page != page)) @@ -1252,11 +1212,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, case XDP_REDIRECT: stats->xdp_redirects++; err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (err) { - if (unlikely(xdp_page != page)) - put_page(xdp_page); - goto err_xdp; - } + if (err) + goto err_xdp_frags; *xdp_xmit |= VIRTIO_XDP_REDIR; if (unlikely(xdp_page != page)) put_page(page); @@ -1269,9 +1226,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, trace_xdp_exception(vi->dev, xdp_prog, act); fallthrough; case XDP_DROP: - if (unlikely(xdp_page != page)) - __free_pages(xdp_page, 0); - goto err_xdp; + goto err_xdp_frags; } err_xdp_frags: if (unlikely(xdp_page != page))