Skip to content

Commit

Permalink
io_uring: avoid whole io_wq_work copy for requests completed inline
Browse files Browse the repository at this point in the history
If requests can be submitted and completed inline, we don't need to
initialize whole io_wq_work in io_init_req(), which is an expensive
operation, add a new 'REQ_F_WORK_INITIALIZED' to determine whether
io_wq_work is initialized and add a helper io_req_init_async(), users
must call io_req_init_async() for the first time touching any members
of io_wq_work.

I use /dev/nullb0 to evaluate performance improvement in my physical
machine:
  modprobe null_blk nr_devices=1 completion_nsec=0
  sudo taskset -c 60 fio  -name=fiotest -filename=/dev/nullb0 -iodepth=128
  -thread -rw=read -ioengine=io_uring -direct=1 -bs=4k -size=100G -numjobs=1
  -time_based -runtime=120

before this patch:
Run status group 0 (all jobs):
   READ: bw=724MiB/s (759MB/s), 724MiB/s-724MiB/s (759MB/s-759MB/s),
   io=84.8GiB (91.1GB), run=120001-120001msec

With this patch:
Run status group 0 (all jobs):
   READ: bw=761MiB/s (798MB/s), 761MiB/s-761MiB/s (798MB/s-798MB/s),
   io=89.2GiB (95.8GB), run=120001-120001msec

About 5% improvement.

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Xiaoguang Wang authored and Jens Axboe committed Jun 10, 2020
1 parent c5b8562 commit 7cdaf58
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 9 deletions.
5 changes: 0 additions & 5 deletions fs/io-wq.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,6 @@ struct io_wq_work {
pid_t task_pid;
};

#define INIT_IO_WORK(work) \
do { \
*(work) = (struct io_wq_work){}; \
} while (0) \

static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
{
if (!work->list.next)
Expand Down
40 changes: 36 additions & 4 deletions fs/io_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ enum {
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
REQ_F_QUEUE_TIMEOUT_BIT,
REQ_F_WORK_INITIALIZED_BIT,

/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
Expand Down Expand Up @@ -599,6 +600,8 @@ enum {
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
/* needs to queue linked timeout */
REQ_F_QUEUE_TIMEOUT = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
/* io_wq_work is initialized */
REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
};

struct async_poll {
Expand Down Expand Up @@ -911,6 +914,19 @@ EXPORT_SYMBOL(io_uring_get_socket);

static void io_file_put_work(struct work_struct *work);

/*
* Note: must call io_req_init_async() for the first time you
* touch any members of io_wq_work.
*/
static inline void io_req_init_async(struct io_kiocb *req)
{
if (req->flags & REQ_F_WORK_INITIALIZED)
return;

memset(&req->work, 0, sizeof(req->work));
req->flags |= REQ_F_WORK_INITIALIZED;
}

static inline bool io_async_submit(struct io_ring_ctx *ctx)
{
return ctx->flags & IORING_SETUP_SQPOLL;
Expand Down Expand Up @@ -1037,6 +1053,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,

static inline void io_req_work_drop_env(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_WORK_INITIALIZED))
return;

if (req->work.mm) {
mmdrop(req->work.mm);
req->work.mm = NULL;
Expand Down Expand Up @@ -2785,8 +2804,14 @@ static int __io_splice_prep(struct io_kiocb *req,
return ret;
req->flags |= REQ_F_NEED_CLEANUP;

if (!S_ISREG(file_inode(sp->file_in)->i_mode))
if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
/*
* Splice operation will be punted aync, and here need to
* modify io_wq_work.flags, so initialize io_wq_work firstly.
*/
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_UNBOUND;
}

return 0;
}
Expand Down Expand Up @@ -3372,8 +3397,10 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
/*
* If we queue this for async, it must not be cancellable. That would
* leave the 'file' in an undeterminate state.
* leave the 'file' in an undeterminate state, and here need to modify
* io_wq_work.flags, so initialize io_wq_work firstly.
*/
io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_NO_CANCEL;

if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
Expand Down Expand Up @@ -4851,6 +4878,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
if (!sqe)
return 0;

io_req_init_async(req);

if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (unlikely(ret))
Expand Down Expand Up @@ -5505,7 +5534,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
again:
linked_timeout = io_prep_linked_timeout(req);

if (req->work.creds && req->work.creds != current_cred()) {
if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
req->work.creds != current_cred()) {
if (old_creds)
revert_creds(old_creds);
if (old_creds == req->work.creds)
Expand All @@ -5528,6 +5558,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
goto exit;
}
punt:
io_req_init_async(req);

if (io_op_defs[req->opcode].file_table) {
ret = io_grab_files(req);
if (ret)
Expand Down Expand Up @@ -5780,7 +5812,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
refcount_set(&req->refs, 2);
req->task = NULL;
req->result = 0;
INIT_IO_WORK(&req->work);

if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
Expand All @@ -5802,6 +5833,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,

id = READ_ONCE(sqe->personality);
if (id) {
io_req_init_async(req);
req->work.creds = idr_find(&ctx->personality_idr, id);
if (unlikely(!req->work.creds))
return -EINVAL;
Expand Down

0 comments on commit 7cdaf58

Please sign in to comment.