Skip to content

Commit

Permalink
Merge tag 'for-5.6/io_uring-vfs-2020-01-29' of git://git.kernel.dk/li…
Browse files Browse the repository at this point in the history
…nux-block

Pull io_uring updates from Jens Axboe:

 - Support for various new opcodes (fallocate, openat, close, statx,
   fadvise, madvise, openat2, non-vectored read/write, send/recv, and
   epoll_ctl)

 - Faster ring quiesce for fileset updates

 - Optimizations for overflow condition checking

 - Support for max-sized clamping

 - Support for probing what opcodes are supported

 - Support for io-wq backend sharing between "sibling" rings

 - Support for registering personalities

 - Lots of little fixes and improvements

* tag 'for-5.6/io_uring-vfs-2020-01-29' of git://git.kernel.dk/linux-block: (64 commits)
  io_uring: add support for epoll_ctl(2)
  eventpoll: support non-blocking do_epoll_ctl() calls
  eventpoll: abstract out epoll_ctl() handler
  io_uring: fix linked command file table usage
  io_uring: support using a registered personality for commands
  io_uring: allow registering credentials
  io_uring: add io-wq workqueue sharing
  io-wq: allow grabbing existing io-wq
  io_uring/io-wq: don't use static creds/mm assignments
  io-wq: make the io_wq ref counted
  io_uring: fix refcounting with batched allocations at OOM
  io_uring: add comment for drain_next
  io_uring: don't attempt to copy iovec for READ/WRITE
  io_uring: honor IOSQE_ASYNC for linked reqs
  io_uring: prep req when do IOSQE_ASYNC
  io_uring: use labeled array init in io_op_defs
  io_uring: optimise sqe-to-req flags translation
  io_uring: remove REQ_F_IO_DRAINED
  io_uring: file switch work needs to get flushed on exit
  io_uring: hide uring_fd in ctx
  ...
  • Loading branch information
Linus Torvalds committed Jan 30, 2020
2 parents 33c84e8 + 3e4827b commit 896f8d2
Show file tree
Hide file tree
Showing 15 changed files with 2,218 additions and 583 deletions.
6 changes: 4 additions & 2 deletions drivers/android/binder.c
Original file line number Diff line number Diff line change
Expand Up @@ -2249,10 +2249,12 @@ static void binder_deferred_fd_close(int fd)
return;
init_task_work(&twcb->twork, binder_do_fd_close);
__close_fd_get_file(fd, &twcb->file);
if (twcb->file)
if (twcb->file) {
filp_close(twcb->file, current->files);
task_work_add(current, &twcb->twork, true);
else
} else {
kfree(twcb);
}
}

static void binder_transaction_buffer_release(struct binder_proc *proc,
Expand Down
87 changes: 56 additions & 31 deletions fs/eventpoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -354,12 +354,6 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
return container_of(p, struct ep_pqueue, pt)->epi;
}

/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
return op != EPOLL_CTL_DEL;
}

/* Initialize the poll safe wake up structure */
static void ep_nested_calls_init(struct nested_calls *ncalls)
{
Expand Down Expand Up @@ -2074,27 +2068,28 @@ SYSCALL_DEFINE1(epoll_create, int, size)
return do_epoll_create(0);
}

/*
* The following function implements the controller interface for
* the eventpoll file that enables the insertion/removal/change of
* file descriptors inside the interest set.
*/
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
static inline int epoll_mutex_lock(struct mutex *mutex, int depth,
bool nonblock)
{
if (!nonblock) {
mutex_lock_nested(mutex, depth);
return 0;
}
if (mutex_trylock(mutex))
return 0;
return -EAGAIN;
}

int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
bool nonblock)
{
int error;
int full_check = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
struct epoll_event epds;
struct eventpoll *tep = NULL;

error = -EFAULT;
if (ep_op_has_event(op) &&
copy_from_user(&epds, event, sizeof(struct epoll_event)))
goto error_return;

error = -EBADF;
f = fdget(epfd);
if (!f.file)
Expand All @@ -2112,7 +2107,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,

/* Check if EPOLLWAKEUP is allowed */
if (ep_op_has_event(op))
ep_take_care_of_epollwakeup(&epds);
ep_take_care_of_epollwakeup(epds);

/*
* We have to check that the file structure underneath the file descriptor
Expand All @@ -2128,11 +2123,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
* Also, we do not currently supported nested exclusive wakeups.
*/
if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
if (ep_op_has_event(op) && (epds->events & EPOLLEXCLUSIVE)) {
if (op == EPOLL_CTL_MOD)
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
(epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
(epds->events & ~EPOLLEXCLUSIVE_OK_BITS)))
goto error_tgt_fput;
}

Expand All @@ -2157,13 +2152,17 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* deep wakeup paths from forming in parallel through multiple
* EPOLL_CTL_ADD operations.
*/
mutex_lock_nested(&ep->mtx, 0);
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error)
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
is_file_epoll(tf.file)) {
full_check = 1;
mutex_unlock(&ep->mtx);
mutex_lock(&epmutex);
error = epoll_mutex_lock(&epmutex, 0, nonblock);
if (error)
goto error_tgt_fput;
full_check = 1;
if (is_file_epoll(tf.file)) {
error = -ELOOP;
if (ep_loop_check(ep, tf.file) != 0) {
Expand All @@ -2173,10 +2172,19 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
} else
list_add(&tf.file->f_tfile_llink,
&tfile_check_list);
mutex_lock_nested(&ep->mtx, 0);
error = epoll_mutex_lock(&ep->mtx, 0, nonblock);
if (error) {
out_del:
list_del(&tf.file->f_tfile_llink);
goto error_tgt_fput;
}
if (is_file_epoll(tf.file)) {
tep = tf.file->private_data;
mutex_lock_nested(&tep->mtx, 1);
error = epoll_mutex_lock(&tep->mtx, 1, nonblock);
if (error) {
mutex_unlock(&ep->mtx);
goto out_del;
}
}
}
}
Expand All @@ -2192,8 +2200,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
switch (op) {
case EPOLL_CTL_ADD:
if (!epi) {
epds.events |= EPOLLERR | EPOLLHUP;
error = ep_insert(ep, &epds, tf.file, fd, full_check);
epds->events |= EPOLLERR | EPOLLHUP;
error = ep_insert(ep, epds, tf.file, fd, full_check);
} else
error = -EEXIST;
if (full_check)
Expand All @@ -2208,8 +2216,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_MOD:
if (epi) {
if (!(epi->event.events & EPOLLEXCLUSIVE)) {
epds.events |= EPOLLERR | EPOLLHUP;
error = ep_modify(ep, epi, &epds);
epds->events |= EPOLLERR | EPOLLHUP;
error = ep_modify(ep, epi, epds);
}
} else
error = -ENOENT;
Expand All @@ -2231,6 +2239,23 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
return error;
}

/*
* The following function implements the controller interface for
* the eventpoll file that enables the insertion/removal/change of
* file descriptors inside the interest set.
*/
SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
struct epoll_event epds;

if (ep_op_has_event(op) &&
copy_from_user(&epds, event, sizeof(struct epoll_event)))
return -EFAULT;

return do_epoll_ctl(epfd, op, fd, &epds, false);
}

/*
* Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_wait(2).
Expand Down
6 changes: 4 additions & 2 deletions fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,9 @@ int __close_fd(struct files_struct *files, unsigned fd)
EXPORT_SYMBOL(__close_fd); /* for ksys_close() */

/*
* variant of __close_fd that gets a ref on the file for later fput
* variant of __close_fd that gets a ref on the file for later fput.
* The caller must ensure that filp_close() called on the file, and then
* an fput().
*/
int __close_fd_get_file(unsigned int fd, struct file **res)
{
Expand All @@ -662,7 +664,7 @@ int __close_fd_get_file(unsigned int fd, struct file **res)
spin_unlock(&files->file_lock);
get_file(file);
*res = file;
return filp_close(file, files);
return 0;

out_unlock:
spin_unlock(&files->file_lock);
Expand Down
8 changes: 8 additions & 0 deletions fs/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ extern struct file *do_filp_open(int dfd, struct filename *pathname,
const struct open_flags *op);
extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
const char *, const struct open_flags *);
extern struct open_how build_open_how(int flags, umode_t mode);
extern int build_open_flags(const struct open_how *how, struct open_flags *op);

long do_sys_ftruncate(unsigned int fd, loff_t length, int small);
long do_faccessat(int dfd, const char __user *filename, int mode);
Expand Down Expand Up @@ -182,3 +184,9 @@ extern const struct dentry_operations ns_dentry_operations;

/* direct-io.c: */
int sb_init_dio_done_wq(struct super_block *sb);

/*
* fs/stat.c:
*/
unsigned vfs_stat_set_lookup_flags(unsigned *lookup_flags, int flags);
int cp_statx(const struct kstat *stat, struct statx __user *buffer);
Loading

0 comments on commit 896f8d2

Please sign in to comment.