Skip to content

Commit

Permalink
Merge tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux
Browse files Browse the repository at this point in the history
Pull io_uring updates from Jens Axboe:
 "This is the first of the io_uring pull requests for the 6.15 merge
  window, there will be others once the net tree has gone in. This
  contains:

   - Cleanup and unification of cancelation handling across various
     request types.

   - Improvement for bundles, supporting them both for incrementally
     consumed buffers, and for non-multishot requests.

   - Enable toggling of using iowait while waiting on io_uring events or
     not. Unfortunately this is still tied with CPU frequency boosting
     on short waits, as the scheduler side has not been very receptive
     to splitting the (useless) iowait stat from the cpufreq implied
     boost.

   - Add support for kbuf nodes, enabling zero-copy support for the ublk
     block driver.

   - Various cleanups for resource node handling.

   - Series greatly cleaning up the legacy provided (non-ring based)
     buffers. For years, we've been pushing the ring provided buffers as
     the way to go, and that is what people have been using. Reduce the
     complexity and code associated with legacy provided buffers.

   - Series cleaning up the compat handling.

   - Series improving and cleaning up the recvmsg/sendmsg iovec and msg
     handling.

   - Series of cleanups for io-wq.

   - Start adding a bunch of selftests. The liburing repository
     generally carries feature and regression tests for everything, but
     at least for ublk initially, we'll try and go the route of having
     it in selftests as well. We'll see how this goes, might decide to
     migrate more tests this way in the future.

   - Various little cleanups and fixes"

* tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux: (108 commits)
  selftests: ublk: add stripe target
  selftests: ublk: simplify loop io completion
  selftests: ublk: enable zero copy for null target
  selftests: ublk: prepare for supporting stripe target
  selftests: ublk: move common code into common.c
  selftests: ublk: increase max buffer size to 1MB
  selftests: ublk: add single sqe allocator helper
  selftests: ublk: add generic_01 for verifying sequential IO order
  selftests: ublk: fix starting ublk device
  io_uring: enable toggle of iowait usage when waiting on CQEs
  selftests: ublk: fix write cache implementation
  selftests: ublk: add variable for user to not show test result
  selftests: ublk: don't show `modprobe` failure
  selftests: ublk: add one dependency header
  io_uring/kbuf: enable bundles for incrementally consumed buffers
  Revert "io_uring/rsrc: simplify the bvec iter count calculation"
  selftests: ublk: improve test usability
  selftests: ublk: add stress test for covering IO vs. killing ublk server
  selftests: ublk: add one stress test for covering IO vs. removing device
  selftests: ublk: load/unload ublk_drv when preparing & cleaning up tests
  ...
  • Loading branch information
Linus Torvalds committed Mar 27, 2025
2 parents 1e1ba8d + 0f3ebf2 commit 91928e0
Show file tree
Hide file tree
Showing 58 changed files with 3,883 additions and 872 deletions.
1 change: 1 addition & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -24397,6 +24397,7 @@ S: Maintained
F: Documentation/block/ublk.rst
F: drivers/block/ublk_drv.c
F: include/uapi/linux/ublk_cmd.h
F: tools/testing/selftests/ublk/

UBSAN
M: Kees Cook <kees@kernel.org>
Expand Down
56 changes: 49 additions & 7 deletions drivers/block/ublk_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@
/* private ioctl command mirror */
#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)

#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)

/* All UBLK_F_* have to be included into UBLK_F_ALL */
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
| UBLK_F_URING_CMD_COMP_IN_TASK \
Expand Down Expand Up @@ -196,12 +199,14 @@ struct ublk_params_header {

static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);

static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
struct ublk_queue *ubq, int tag, size_t offset);
static inline unsigned int ublk_req_build_flags(struct request *req);
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
int tag);
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
{
return ub->dev_info.flags & UBLK_F_USER_COPY;
return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
}

static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
Expand Down Expand Up @@ -581,7 +586,7 @@ static void ublk_apply_params(struct ublk_device *ub)

static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
{
return ubq->flags & UBLK_F_USER_COPY;
return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
}

static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
Expand Down Expand Up @@ -1747,6 +1752,42 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
io_uring_cmd_mark_cancelable(cmd, issue_flags);
}

static void ublk_io_release(void *priv)
{
struct request *rq = priv;
struct ublk_queue *ubq = rq->mq_hctx->driver_data;

ublk_put_req_ref(ubq, rq);
}

static int ublk_register_io_buf(struct io_uring_cmd *cmd,
struct ublk_queue *ubq, unsigned int tag,
unsigned int index, unsigned int issue_flags)
{
struct ublk_device *ub = cmd->file->private_data;
struct request *req;
int ret;

req = __ublk_check_and_get_req(ub, ubq, tag, 0);
if (!req)
return -EINVAL;

ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index,
issue_flags);
if (ret) {
ublk_put_req_ref(ubq, req);
return ret;
}

return 0;
}

static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
unsigned int index, unsigned int issue_flags)
{
return io_buffer_unregister_bvec(cmd, index, issue_flags);
}

static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags,
const struct ublksrv_io_cmd *ub_cmd)
Expand Down Expand Up @@ -1798,6 +1839,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,

ret = -EINVAL;
switch (_IOC_NR(cmd_op)) {
case UBLK_IO_REGISTER_IO_BUF:
return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
case UBLK_IO_UNREGISTER_IO_BUF:
return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
case UBLK_IO_FETCH_REQ:
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
if (ublk_queue_ready(ubq)) {
Expand Down Expand Up @@ -2459,7 +2504,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
* buffer by pwrite() to ublk char device, which can't be
* used for unprivileged device
*/
if (info.flags & UBLK_F_USER_COPY)
if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
return -EINVAL;
}

Expand Down Expand Up @@ -2527,9 +2572,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
goto out_free_dev_number;
}

/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;

ub->dev_info.nr_hw_queues = min_t(unsigned int,
ub->dev_info.nr_hw_queues, nr_cpu_ids);
ublk_align_max_io_size(ub);
Expand Down Expand Up @@ -2863,7 +2905,7 @@ static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
{
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
void __user *argp = (void __user *)(unsigned long)header->addr;
u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY;
u64 features = UBLK_F_ALL;

if (header->len != UBLK_FEATURES_LEN || !header->addr)
return -EINVAL;
Expand Down
12 changes: 7 additions & 5 deletions drivers/nvme/host/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,

static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
struct io_uring_cmd *ioucmd, unsigned int flags)
struct io_uring_cmd *ioucmd, unsigned int flags,
unsigned int iou_issue_flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
Expand Down Expand Up @@ -146,7 +147,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
goto out;
}
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd);
rq_data_dir(req), &iter, ioucmd,
iou_issue_flags);
if (ret < 0)
goto out;
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
Expand Down Expand Up @@ -198,7 +200,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, NULL, flags);
meta_len, NULL, flags, 0);
if (ret)
return ret;
}
Expand Down Expand Up @@ -514,10 +516,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return PTR_ERR(req);
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;

if (d.addr && d.data_len) {
if (d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, ioucmd, vec);
d.metadata_len, ioucmd, vec, issue_flags);
if (ret)
return ret;
}
Expand Down
17 changes: 14 additions & 3 deletions include/linux/io_uring/cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <uapi/linux/io_uring.h>
#include <linux/io_uring_types.h>
#include <linux/blk-mq.h>

/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
#define IORING_URING_CMD_CANCELABLE (1U << 30)
Expand Down Expand Up @@ -39,7 +40,9 @@ static inline void io_uring_cmd_private_sz_check(size_t cmd_sz)

#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
struct iov_iter *iter,
struct io_uring_cmd *ioucmd,
unsigned int issue_flags);

/*
* Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
Expand All @@ -66,8 +69,10 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);

#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
static inline int
io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, struct io_uring_cmd *ioucmd,
unsigned int issue_flags)
{
return -EOPNOTSUPP;
}
Expand Down Expand Up @@ -123,4 +128,10 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
return cmd_to_io_kiocb(cmd)->async_data;
}

int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
void (*release)(void *), unsigned int index,
unsigned int issue_flags);
int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index,
unsigned int issue_flags);

#endif /* _LINUX_IO_URING_CMD_H */
20 changes: 15 additions & 5 deletions include/linux/io_uring_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ struct io_ring_ctx {

struct io_file_table file_table;
struct io_rsrc_data buf_table;
struct io_alloc_cache node_cache;
struct io_alloc_cache imu_cache;

struct io_submit_state submit_state;

Expand Down Expand Up @@ -360,7 +362,6 @@ struct io_ring_ctx {

spinlock_t completion_lock;

struct list_head io_buffers_comp;
struct list_head cq_overflow_list;

struct hlist_head waitid_list;
Expand All @@ -379,8 +380,6 @@ struct io_ring_ctx {
unsigned int file_alloc_start;
unsigned int file_alloc_end;

struct list_head io_buffers_cache;

/* Keep this last, we don't need it for the fast path */
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
Expand Down Expand Up @@ -439,8 +438,15 @@ struct io_ring_ctx {
struct io_mapped_region param_region;
};

/*
* Token indicating function is called in task work context:
* ctx->uring_lock is held and any completions generated will be flushed.
* ONLY core io_uring.c should instantiate this struct.
*/
struct io_tw_state {
};
/* Alias to use in code that doesn't instantiate struct io_tw_state */
typedef struct io_tw_state io_tw_token_t;

enum {
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
Expand Down Expand Up @@ -566,7 +572,7 @@ enum {
REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
};

typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);

struct io_task_work {
struct llist_node node;
Expand Down Expand Up @@ -601,7 +607,11 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz)
io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \
((cmd_type *)&(req)->cmd) \
)
#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr)

static inline struct io_kiocb *cmd_to_io_kiocb(void *ptr)
{
return ptr;
}

struct io_kiocb {
union {
Expand Down
2 changes: 2 additions & 0 deletions include/uapi/linux/io_uring.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_REGISTERED_RING (1U << 4)
#define IORING_ENTER_ABS_TIMER (1U << 5)
#define IORING_ENTER_EXT_ARG_REG (1U << 6)
#define IORING_ENTER_NO_IOWAIT (1U << 7)

/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
Expand Down Expand Up @@ -578,6 +579,7 @@ struct io_uring_params {
#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
#define IORING_FEAT_RW_ATTR (1U << 16)
#define IORING_FEAT_NO_IOWAIT (1U << 17)

/*
* io_uring_register(2) opcodes and arguments
Expand Down
4 changes: 4 additions & 0 deletions include/uapi/linux/ublk_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@
_IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
#define UBLK_U_IO_NEED_GET_DATA \
_IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
#define UBLK_U_IO_REGISTER_IO_BUF \
_IOWR('u', 0x23, struct ublksrv_io_cmd)
#define UBLK_U_IO_UNREGISTER_IO_BUF \
_IOWR('u', 0x24, struct ublksrv_io_cmd)

/* only ABORT means that no re-fetch */
#define UBLK_IO_RES_OK 0
Expand Down
6 changes: 6 additions & 0 deletions io_uring/alloc_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,10 @@ static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
return io_cache_alloc_new(cache, gfp);
}

static inline void io_cache_free(struct io_alloc_cache *cache, void *obj)
{
if (!io_alloc_cache_put(cache, obj))
kfree(obj);
}

#endif
42 changes: 42 additions & 0 deletions io_uring/cancel.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,3 +341,45 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
fput(file);
return ret;
}

bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
struct hlist_head *list, bool cancel_all,
bool (*cancel)(struct io_kiocb *))
{
struct hlist_node *tmp;
struct io_kiocb *req;
bool found = false;

lockdep_assert_held(&ctx->uring_lock);

hlist_for_each_entry_safe(req, tmp, list, hash_node) {
if (!io_match_task_safe(req, tctx, cancel_all))
continue;
hlist_del_init(&req->hash_node);
if (cancel(req))
found = true;
}

return found;
}

int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
unsigned int issue_flags, struct hlist_head *list,
bool (*cancel)(struct io_kiocb *))
{
struct hlist_node *tmp;
struct io_kiocb *req;
int nr = 0;

io_ring_submit_lock(ctx, issue_flags);
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
if (!io_cancel_req_match(req, cd))
continue;
if (cancel(req))
nr++;
if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
break;
}
io_ring_submit_unlock(ctx, issue_flags);
return nr ?: -ENOENT;
}
8 changes: 8 additions & 0 deletions io_uring/cancel.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg);
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd);

bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
struct hlist_head *list, bool cancel_all,
bool (*cancel)(struct io_kiocb *));

int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
unsigned int issue_flags, struct hlist_head *list,
bool (*cancel)(struct io_kiocb *));

static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence)
{
if (req->cancel_seq_set && sequence == req->work.cancel_seq)
Expand Down
2 changes: 1 addition & 1 deletion io_uring/filetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (slot_index >= ctx->file_table.data.nr)
return -EINVAL;

node = io_rsrc_node_alloc(IORING_RSRC_FILE);
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node)
return -ENOMEM;

Expand Down
Loading

0 comments on commit 91928e0

Please sign in to comment.