Skip to content

Commit

Permalink
Merge tag 'for-5.12/io_uring-2021-02-17' of git://git.kernel.dk/linux…
Browse files Browse the repository at this point in the history
…-block

Pull io_uring updates from Jens Axboe:
 "Highlights from this cycles are things like request recycling and
  task_work optimizations, which net us anywhere from 10-20% of speedups
  on workloads that mostly are inline.

  This work was originally done to put io_uring under memcg, which adds
  considerable overhead. But it's a really nice win as well. Also worth
  highlighting is the LOOKUP_CACHED work in the VFS, and using it in
  io_uring. Greatly speeds up the fast path for file opens.

  Summary:

   - Put io_uring under memcg protection. We accounted just the rings
     themselves under rlimit memlock before, now we account everything.

   - Request cache recycling, persistent across invocations (Pavel, me)

   - First part of a cleanup/improvement to buffer registration (Bijan)

   - SQPOLL fixes (Hao)

   - File registration NULL pointer fixup (Dan)

   - LOOKUP_CACHED support for io_uring

   - Disable /proc/thread-self/ for io_uring, like we do for /proc/self

   - Add Pavel to the io_uring MAINTAINERS entry

   - Tons of code cleanups and optimizations (Pavel)

   - Support for skip entries in file registration (Noah)"

* tag 'for-5.12/io_uring-2021-02-17' of git://git.kernel.dk/linux-block: (103 commits)
  io_uring: tctx->task_lock should be IRQ safe
  proc: don't allow async path resolution of /proc/thread-self components
  io_uring: kill cached requests from exiting task closing the ring
  io_uring: add helper to free all request caches
  io_uring: allow task match to be passed to io_req_cache_free()
  io-wq: clear out worker ->fs and ->files
  io_uring: optimise io_init_req() flags setting
  io_uring: clean io_req_find_next() fast check
  io_uring: don't check PF_EXITING from syscall
  io_uring: don't split out consume out of SQE get
  io_uring: save ctx put/get for task_work submit
  io_uring: don't duplicate io_req_task_queue()
  io_uring: optimise SQPOLL mm/files grabbing
  io_uring: optimise out unlikely link queue
  io_uring: take compl state from submit state
  io_uring: inline io_complete_rw_common()
  io_uring: move res check out of io_rw_reissue()
  io_uring: simplify iopoll reissuing
  io_uring: clean up io_req_free_batch_finish()
  io_uring: move submit side state closer in the ring
  ...
  • Loading branch information
Linus Torvalds committed Feb 21, 2021
2 parents 9820b4d + 0b81e80 commit 5bbb336
Show file tree
Hide file tree
Showing 10 changed files with 1,431 additions and 1,347 deletions.
5 changes: 5 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -6830,6 +6830,9 @@ F: include/linux/fs.h
F: include/linux/fs_types.h
F: include/uapi/linux/fs.h
F: include/uapi/linux/openat2.h
X: fs/io-wq.c
X: fs/io-wq.h
X: fs/io_uring.c

FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
M: Riku Voipio <riku.voipio@iki.fi>
Expand Down Expand Up @@ -9263,13 +9266,15 @@ F: include/uapi/linux/iommu.h

IO_URING
M: Jens Axboe <axboe@kernel.dk>
R: Pavel Begunkov <asml.silence@gmail.com>
L: io-uring@vger.kernel.org
S: Maintained
T: git git://git.kernel.dk/linux-block
T: git git://git.kernel.dk/liburing
F: fs/io-wq.c
F: fs/io-wq.h
F: fs/io_uring.c
F: include/linux/io_uring.h
F: include/uapi/linux/io_uring.h

IPMI SUBSYSTEM
Expand Down
36 changes: 25 additions & 11 deletions fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <linux/close_range.h>
#include <net/sock.h>

#include "internal.h"

unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */
Expand Down Expand Up @@ -732,36 +734,48 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
}

/*
* variant of close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file, and then
* an fput().
* See close_fd_get_file() below, this variant assumes current->files->file_lock
* is held.
*/
int close_fd_get_file(unsigned int fd, struct file **res)
int __close_fd_get_file(unsigned int fd, struct file **res)
{
struct files_struct *files = current->files;
struct file *file;
struct fdtable *fdt;

spin_lock(&files->file_lock);
fdt = files_fdtable(files);
if (fd >= fdt->max_fds)
goto out_unlock;
goto out_err;
file = fdt->fd[fd];
if (!file)
goto out_unlock;
goto out_err;
rcu_assign_pointer(fdt->fd[fd], NULL);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
get_file(file);
*res = file;
return 0;

out_unlock:
spin_unlock(&files->file_lock);
out_err:
*res = NULL;
return -ENOENT;
}

/*
* variant of close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file, and then
* an fput().
*/
int close_fd_get_file(unsigned int fd, struct file **res)
{
struct files_struct *files = current->files;
int ret;

spin_lock(&files->file_lock);
ret = __close_fd_get_file(fd, res);
spin_unlock(&files->file_lock);

return ret;
}

void do_close_on_exec(struct files_struct *files)
{
unsigned i;
Expand Down
1 change: 1 addition & 0 deletions fs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);
extern int __close_fd_get_file(unsigned int fd, struct file **res);

long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
int chmod_common(const struct path *path, umode_t mode);
Expand Down
31 changes: 12 additions & 19 deletions fs/io-wq.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ struct io_worker {
#endif
const struct cred *cur_creds;
const struct cred *saved_creds;
struct files_struct *restore_files;
struct nsproxy *restore_nsproxy;
struct fs_struct *restore_fs;
};

#if BITS_PER_LONG == 64
Expand Down Expand Up @@ -156,19 +154,19 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
worker->cur_creds = worker->saved_creds = NULL;
}

if (current->files != worker->restore_files) {
if (current->files) {
__acquire(&wqe->lock);
raw_spin_unlock_irq(&wqe->lock);
dropped_lock = true;

task_lock(current);
current->files = worker->restore_files;
current->files = NULL;
current->nsproxy = worker->restore_nsproxy;
task_unlock(current);
}

if (current->fs != worker->restore_fs)
current->fs = worker->restore_fs;
if (current->fs)
current->fs = NULL;

/*
* If we have an active mm, we need to drop the wq lock before unusing
Expand Down Expand Up @@ -329,11 +327,11 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
allow_kernel_signal(SIGINT);

current->flags |= PF_IO_WORKER;
current->fs = NULL;
current->files = NULL;

worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
worker->restore_files = current->files;
worker->restore_nsproxy = current->nsproxy;
worker->restore_fs = current->fs;
io_wqe_inc_running(wqe, worker);
}

Expand Down Expand Up @@ -555,23 +553,21 @@ static void io_worker_handle_work(struct io_worker *worker)

/* handle a whole dependent link */
do {
struct io_wq_work *old_work, *next_hashed, *linked;
struct io_wq_work *next_hashed, *linked;
unsigned int hash = io_get_work_hash(work);

next_hashed = wq_next_work(work);
io_impersonate_work(worker, work);
wq->do_work(work);
io_assign_current_work(worker, NULL);

old_work = work;
linked = wq->do_work(work);

linked = wq->free_work(work);
work = next_hashed;
if (!work && linked && !io_wq_is_hashed(linked)) {
work = linked;
linked = NULL;
}
io_assign_current_work(worker, work);
wq->free_work(old_work);

if (linked)
io_wqe_enqueue(wqe, linked);

Expand Down Expand Up @@ -850,11 +846,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
struct io_wq *wq = wqe->wq;

do {
struct io_wq_work *old_work = work;

work->flags |= IO_WQ_WORK_CANCEL;
work = wq->do_work(work);
wq->free_work(old_work);
wq->do_work(work);
work = wq->free_work(work);
} while (work);
}

Expand Down Expand Up @@ -944,7 +938,6 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
*/
spin_lock_irqsave(&worker->lock, flags);
if (worker->cur_work &&
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
match->fn(worker->cur_work, match->data)) {
send_sig(SIGINT, worker->task, 1);
match->nr_running++;
Expand Down
14 changes: 2 additions & 12 deletions fs/io-wq.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ enum {
IO_WQ_WORK_CANCEL = 1,
IO_WQ_WORK_HASHED = 2,
IO_WQ_WORK_UNBOUND = 4,
IO_WQ_WORK_NO_CANCEL = 8,
IO_WQ_WORK_CONCURRENT = 16,

IO_WQ_WORK_FILES = 32,
Expand All @@ -28,15 +27,6 @@ enum io_wq_cancel {
IO_WQ_CANCEL_NOTFOUND, /* work not found */
};

struct io_wq_work_node {
struct io_wq_work_node *next;
};

struct io_wq_work_list {
struct io_wq_work_node *first;
struct io_wq_work_node *last;
};

static inline void wq_list_add_after(struct io_wq_work_node *node,
struct io_wq_work_node *pos,
struct io_wq_work_list *list)
Expand Down Expand Up @@ -107,8 +97,8 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
return container_of(work->list.next, struct io_wq_work, list);
}

typedef void (free_work_fn)(struct io_wq_work *);
typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
typedef void (io_wq_work_fn)(struct io_wq_work *);

struct io_wq_data {
struct user_struct *user;
Expand Down
Loading

0 comments on commit 5bbb336

Please sign in to comment.