Skip to content

Commit

Permalink
Merge branch 'work.epoll' of git://git.kernel.org/pub/scm/linux/kerne…
Browse files Browse the repository at this point in the history
…l/git/viro/vfs

Pull epoll fixes from Al Viro:
 "Several race fixes in epoll"

* 'work.epoll' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ep_create_wakeup_source(): dentry name can change under you...
  epoll: EPOLL_CTL_ADD: close the race in decision to take fast path
  epoll: replace ->visited/visited_list with generation count
  epoll: do not insert into poll queues until all sanity checks are done
  • Loading branch information
Linus Torvalds committed Oct 2, 2020
2 parents db23baa + 3701cb5 commit d4fce2e
Showing 1 changed file with 31 additions and 41 deletions.
72 changes: 31 additions & 41 deletions fs/eventpoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,7 @@ struct eventpoll {
struct file *file;

/* used to optimize loop detection check */
struct list_head visited_list_link;
int visited;
u64 gen;

#ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */
Expand Down Expand Up @@ -274,6 +273,8 @@ static long max_user_watches __read_mostly;
*/
static DEFINE_MUTEX(epmutex);

static u64 loop_check_gen = 0;

/* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls;

Expand All @@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;

/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
static LIST_HEAD(visited_list);

/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
Expand Down Expand Up @@ -1450,7 +1448,7 @@ static int reverse_path_check(void)

static int ep_create_wakeup_source(struct epitem *epi)
{
const char *name;
struct name_snapshot n;
struct wakeup_source *ws;

if (!epi->ep->ws) {
Expand All @@ -1459,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
return -ENOMEM;
}

name = epi->ffd.file->f_path.dentry->d_name.name;
ws = wakeup_source_register(NULL, name);
take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
ws = wakeup_source_register(NULL, n.name.name);
release_dentry_name_snapshot(&n);

if (!ws)
return -ENOMEM;
Expand Down Expand Up @@ -1522,6 +1521,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL);
}

/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);

/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.
*/
ep_rbtree_insert(ep, epi);

/* now check if we've created too many backpaths */
error = -EINVAL;
if (full_check && reverse_path_check())
goto error_remove_epi;

/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
Expand All @@ -1544,22 +1559,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (epi->nwait < 0)
goto error_unregister;

/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);

/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.
*/
ep_rbtree_insert(ep, epi);

/* now check if we've created too many backpaths */
error = -EINVAL;
if (full_check && reverse_path_check())
goto error_remove_epi;

/* We have to drop the new item inside our item list to keep track of it */
write_lock_irq(&ep->lock);

Expand Down Expand Up @@ -1588,16 +1587,15 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,

return 0;

error_unregister:
ep_unregister_pollwait(ep, epi);
error_remove_epi:
spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);

rb_erase_cached(&epi->rbn, &ep->rbr);

error_unregister:
ep_unregister_pollwait(ep, epi);

/*
* We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist
Expand Down Expand Up @@ -1972,13 +1970,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi;

mutex_lock_nested(&ep->mtx, call_nests + 1);
ep->visited = 1;
list_add(&ep->visited_list_link, &visited_list);
ep->gen = loop_check_gen;
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit->visited)
if (ep_tovisit->gen == loop_check_gen)
continue;
error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file,
Expand Down Expand Up @@ -2019,18 +2016,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
int ret;
struct eventpoll *ep_cur, *ep_next;

ret = ep_call_nested(&poll_loop_ncalls,
return ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current);
/* clear visited list */
list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
visited_list_link) {
ep_cur->visited = 0;
list_del(&ep_cur->visited_list_link);
}
return ret;
}

static void clear_tfile_check_list(void)
Expand Down Expand Up @@ -2195,11 +2182,13 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) ||
ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) {
mutex_unlock(&ep->mtx);
error = epoll_mutex_lock(&epmutex, 0, nonblock);
if (error)
goto error_tgt_fput;
loop_check_gen++;
full_check = 1;
if (is_file_epoll(tf.file)) {
error = -ELOOP;
Expand Down Expand Up @@ -2263,6 +2252,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
error_tgt_fput:
if (full_check) {
clear_tfile_check_list();
loop_check_gen++;
mutex_unlock(&epmutex);
}

Expand Down

0 comments on commit d4fce2e

Please sign in to comment.