-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
io_uring/zcrx: add interface queue and refill queue
Add a new object called an interface queue (ifq) that represents a net rx queue that has been configured for zero copy. Each ifq is registered using a new registration opcode IORING_REGISTER_ZCRX_IFQ. The refill queue is allocated by the kernel and mapped by userspace using a new offset IORING_OFF_RQ_RING, in a similar fashion to the main SQ/CQ. It is used by userspace to return buffers that it is done with, which will then be re-used by the netdev again. The main CQ ring is used to notify userspace of received data by using the upper 16 bytes of a big CQE as a new struct io_uring_zcrx_cqe. Each entry contains the offset + len to the data. For now, each io_uring instance only has a single ifq. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: David Wei <dw@davidwei.uk> Acked-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20250215000947.789731-2-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
- Loading branch information
David Wei
authored and
Jens Axboe
committed
Feb 17, 2025
1 parent
5c496ff
commit 6f37787
Showing
10 changed files
with
260 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,3 +30,5 @@ source "lib/Kconfig" | |
source "lib/Kconfig.debug" | ||
|
||
source "Documentation/Kconfig" | ||
|
||
source "io_uring/KConfig" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# SPDX-License-Identifier: GPL-2.0-only | ||
# | ||
# io_uring configuration | ||
# | ||
|
||
config IO_URING_ZCRX | ||
def_bool y | ||
depends on PAGE_POOL | ||
depends on INET | ||
depends on NET_RX_BUSY_POLL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
#include <linux/kernel.h> | ||
#include <linux/errno.h> | ||
#include <linux/mm.h> | ||
#include <linux/io_uring.h> | ||
|
||
#include <uapi/linux/io_uring.h> | ||
|
||
#include "io_uring.h" | ||
#include "kbuf.h" | ||
#include "memmap.h" | ||
#include "zcrx.h" | ||
|
||
#define IO_RQ_MAX_ENTRIES 32768 | ||
|
||
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, | ||
struct io_uring_zcrx_ifq_reg *reg, | ||
struct io_uring_region_desc *rd) | ||
{ | ||
size_t off, size; | ||
void *ptr; | ||
int ret; | ||
|
||
off = sizeof(struct io_uring); | ||
size = off + sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries; | ||
if (size > rd->size) | ||
return -EINVAL; | ||
|
||
ret = io_create_region_mmap_safe(ifq->ctx, &ifq->ctx->zcrx_region, rd, | ||
IORING_MAP_OFF_ZCRX_REGION); | ||
if (ret < 0) | ||
return ret; | ||
|
||
ptr = io_region_get_ptr(&ifq->ctx->zcrx_region); | ||
ifq->rq_ring = (struct io_uring *)ptr; | ||
ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off); | ||
return 0; | ||
} | ||
|
||
static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) | ||
{ | ||
io_free_region(ifq->ctx, &ifq->ctx->zcrx_region); | ||
ifq->rq_ring = NULL; | ||
ifq->rqes = NULL; | ||
} | ||
|
||
static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) | ||
{ | ||
struct io_zcrx_ifq *ifq; | ||
|
||
ifq = kzalloc(sizeof(*ifq), GFP_KERNEL); | ||
if (!ifq) | ||
return NULL; | ||
|
||
ifq->if_rxq = -1; | ||
ifq->ctx = ctx; | ||
return ifq; | ||
} | ||
|
||
static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) | ||
{ | ||
io_free_rbuf_ring(ifq); | ||
kfree(ifq); | ||
} | ||
|
||
int io_register_zcrx_ifq(struct io_ring_ctx *ctx, | ||
struct io_uring_zcrx_ifq_reg __user *arg) | ||
{ | ||
struct io_uring_zcrx_ifq_reg reg; | ||
struct io_uring_region_desc rd; | ||
struct io_zcrx_ifq *ifq; | ||
int ret; | ||
|
||
/* | ||
* 1. Interface queue allocation. | ||
* 2. It can observe data destined for sockets of other tasks. | ||
*/ | ||
if (!capable(CAP_NET_ADMIN)) | ||
return -EPERM; | ||
|
||
/* mandatory io_uring features for zc rx */ | ||
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN && | ||
ctx->flags & IORING_SETUP_CQE32)) | ||
return -EINVAL; | ||
if (ctx->ifq) | ||
return -EBUSY; | ||
if (copy_from_user(®, arg, sizeof(reg))) | ||
return -EFAULT; | ||
if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd))) | ||
return -EFAULT; | ||
if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) | ||
return -EINVAL; | ||
if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags) | ||
return -EINVAL; | ||
if (reg.rq_entries > IO_RQ_MAX_ENTRIES) { | ||
if (!(ctx->flags & IORING_SETUP_CLAMP)) | ||
return -EINVAL; | ||
reg.rq_entries = IO_RQ_MAX_ENTRIES; | ||
} | ||
reg.rq_entries = roundup_pow_of_two(reg.rq_entries); | ||
|
||
if (!reg.area_ptr) | ||
return -EFAULT; | ||
|
||
ifq = io_zcrx_ifq_alloc(ctx); | ||
if (!ifq) | ||
return -ENOMEM; | ||
|
||
ret = io_allocate_rbuf_ring(ifq, ®, &rd); | ||
if (ret) | ||
goto err; | ||
|
||
ifq->rq_entries = reg.rq_entries; | ||
ifq->if_rxq = reg.if_rxq; | ||
|
||
reg.offsets.rqes = sizeof(struct io_uring); | ||
reg.offsets.head = offsetof(struct io_uring, head); | ||
reg.offsets.tail = offsetof(struct io_uring, tail); | ||
|
||
if (copy_to_user(arg, ®, sizeof(reg)) || | ||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) { | ||
ret = -EFAULT; | ||
goto err; | ||
} | ||
|
||
ctx->ifq = ifq; | ||
return 0; | ||
err: | ||
io_zcrx_ifq_free(ifq); | ||
return ret; | ||
} | ||
|
||
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) | ||
{ | ||
struct io_zcrx_ifq *ifq = ctx->ifq; | ||
|
||
lockdep_assert_held(&ctx->uring_lock); | ||
|
||
if (!ifq) | ||
return; | ||
|
||
ctx->ifq = NULL; | ||
io_zcrx_ifq_free(ifq); | ||
} | ||
|
||
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) | ||
{ | ||
lockdep_assert_held(&ctx->uring_lock); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
#ifndef IOU_ZC_RX_H | ||
#define IOU_ZC_RX_H | ||
|
||
#include <linux/io_uring_types.h> | ||
|
||
struct io_zcrx_ifq { | ||
struct io_ring_ctx *ctx; | ||
struct io_uring *rq_ring; | ||
struct io_uring_zcrx_rqe *rqes; | ||
u32 rq_entries; | ||
|
||
u32 if_rxq; | ||
}; | ||
|
||
#if defined(CONFIG_IO_URING_ZCRX) | ||
int io_register_zcrx_ifq(struct io_ring_ctx *ctx, | ||
struct io_uring_zcrx_ifq_reg __user *arg); | ||
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); | ||
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx); | ||
#else | ||
static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx, | ||
struct io_uring_zcrx_ifq_reg __user *arg) | ||
{ | ||
return -EOPNOTSUPP; | ||
} | ||
static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) | ||
{ | ||
} | ||
static inline void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) | ||
{ | ||
} | ||
#endif | ||
|
||
#endif |