Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 284359
b: refs/heads/master
c: 28d82dc
h: refs/heads/master
i:
  284357: e353d69
  284355: 55953f8
  284351: 80a39ad
v: v3
  • Loading branch information
Jason Baron authored and Linus Torvalds committed Jan 13, 2012
1 parent 3b1b6f0 commit ab85276
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 26 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 2ccd4f4d4737b37e21dd92c8c584c23cd87740a2
refs/heads/master: 28d82dc1c4edbc352129f97f4ca22624d1fe61de
234 changes: 209 additions & 25 deletions trunk/fs/eventpoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ struct eventpoll {

/* The user that created the eventpoll descriptor */
struct user_struct *user;

struct file *file;

/* used to optimize loop detection check */
int visited;
struct list_head visited_list_link;
};

/* Wait structure used by the poll hooks */
Expand Down Expand Up @@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;

/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
static LIST_HEAD(visited_list);

/*
* List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex.
*/
static LIST_HEAD(tfile_check_list);

#ifdef CONFIG_SYSCTL

#include <linux/sysctl.h>
Expand All @@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
};
#endif /* CONFIG_SYSCTL */

static const struct file_operations eventpoll_fops;

static inline int is_file_epoll(struct file *f)
{
return f->f_op == &eventpoll_fops;
}

/* Setup the structure that is used as key for the RB tree */
static inline void ep_set_ffd(struct epoll_filefd *ffd,
Expand Down Expand Up @@ -711,12 +732,6 @@ static const struct file_operations eventpoll_fops = {
.llseek = noop_llseek,
};

/* Fast test to see if the file is an eventpoll file */
static inline int is_file_epoll(struct file *f)
{
return f->f_op == &eventpoll_fops;
}

/*
* This is called from eventpoll_release() to unlink files from the eventpoll
* interface. We need to have this facility to cleanup correctly files that are
Expand Down Expand Up @@ -926,6 +941,99 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
rb_insert_color(&epi->rbn, &ep->rbr);
}



#define PATH_ARR_SIZE 5
/*
* These are the number paths of length 1 to 5, that we are allowing to emanate
* from a single file of interest. For example, we allow 1000 paths of length
* 1, to emanate from each file of interest. This essentially represents the
* potential wakeup paths, which need to be limited in order to avoid massive
* uncontrolled wakeup storms. The common use case should be a single ep which
* is connected to n file sources. In this case each file source has 1 path
* of length 1. Thus, the numbers below should be more than sufficient. These
* path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
* and delete can't add additional paths. Protected by the epmutex.
*/
static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
static int path_count[PATH_ARR_SIZE];

static int path_count_inc(int nests)
{
if (++path_count[nests] > path_limits[nests])
return -1;
return 0;
}

static void path_count_init(void)
{
int i;

for (i = 0; i < PATH_ARR_SIZE; i++)
path_count[i] = 0;
}

static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
{
int error = 0;
struct file *file = priv;
struct file *child_file;
struct epitem *epi;

list_for_each_entry(epi, &file->f_ep_links, fllink) {
child_file = epi->ep->file;
if (is_file_epoll(child_file)) {
if (list_empty(&child_file->f_ep_links)) {
if (path_count_inc(call_nests)) {
error = -1;
break;
}
} else {
error = ep_call_nested(&poll_loop_ncalls,
EP_MAX_NESTS,
reverse_path_check_proc,
child_file, child_file,
current);
}
if (error != 0)
break;
} else {
printk(KERN_ERR "reverse_path_check_proc: "
"file is not an ep!\n");
}
}
return error;
}

/**
* reverse_path_check - The tfile_check_list is list of file *, which have
* links that are proposed to be newly added. We need to
* make sure that those added links don't add too many
* paths such that we will spend all our time waking up
* eventpoll objects.
*
* Returns: Returns zero if the proposed links don't create too many paths,
* -1 otherwise.
*/
static int reverse_path_check(void)
{
int length = 0;
int error = 0;
struct file *current_file;

/* let's call this for all tfiles */
list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
length++;
path_count_init();
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
reverse_path_check_proc, current_file,
current_file, current);
if (error)
break;
}
return error;
}

/*
* Must be called with "mtx" held.
*/
Expand Down Expand Up @@ -987,6 +1095,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
*/
ep_rbtree_insert(ep, epi);

/* now check if we've created too many backpaths */
error = -EINVAL;
if (reverse_path_check())
goto error_remove_epi;

/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);

Expand All @@ -1011,6 +1124,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,

return 0;

error_remove_epi:
spin_lock(&tfile->f_lock);
if (ep_is_linked(&epi->fllink))
list_del_init(&epi->fllink);
spin_unlock(&tfile->f_lock);

rb_erase(&epi->rbn, &ep->rbr);

error_unregister:
ep_unregister_pollwait(ep, epi);

Expand Down Expand Up @@ -1275,18 +1396,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
int error = 0;
struct file *file = priv;
struct eventpoll *ep = file->private_data;
struct eventpoll *ep_tovisit;
struct rb_node *rbp;
struct epitem *epi;

mutex_lock_nested(&ep->mtx, call_nests + 1);
ep->visited = 1;
list_add(&ep->visited_list_link, &visited_list);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit->visited)
continue;
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, epi->ffd.file,
epi->ffd.file->private_data, current);
ep_loop_check_proc, epi->ffd.file,
ep_tovisit, current);
if (error != 0)
break;
} else {
/*
* If we've reached a file that is not associated with
* an ep, then we need to check if the newly added
* links are going to add too many wakeup paths. We do
* this by adding it to the tfile_check_list, if it's
* not already there, and calling reverse_path_check()
* during ep_insert().
*/
if (list_empty(&epi->ffd.file->f_tfile_llink))
list_add(&epi->ffd.file->f_tfile_llink,
&tfile_check_list);
}
}
mutex_unlock(&ep->mtx);
Expand All @@ -1307,17 +1446,41 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
int ret;
struct eventpoll *ep_cur, *ep_next;

ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, file, ep, current);
/* clear visited list */
list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
visited_list_link) {
ep_cur->visited = 0;
list_del(&ep_cur->visited_list_link);
}
return ret;
}

static void clear_tfile_check_list(void)
{
struct file *file;

/* first clear the tfile_check_list */
while (!list_empty(&tfile_check_list)) {
file = list_first_entry(&tfile_check_list, struct file,
f_tfile_llink);
list_del_init(&file->f_tfile_llink);
}
INIT_LIST_HEAD(&tfile_check_list);
}

/*
* Open an eventpoll file descriptor.
*/
SYSCALL_DEFINE1(epoll_create1, int, flags)
{
int error;
int error, fd;
struct eventpoll *ep = NULL;
struct file *file;

/* Check the EPOLL_* constant for consistency. */
BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
Expand All @@ -1334,11 +1497,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
*/
error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
if (fd < 0) {
error = fd;
goto out_free_ep;
}
file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
O_RDWR | (flags & O_CLOEXEC));
if (error < 0)
ep_free(ep);

if (IS_ERR(file)) {
error = PTR_ERR(file);
goto out_free_fd;
}
fd_install(fd, file);
ep->file = file;
return fd;

out_free_fd:
put_unused_fd(fd);
out_free_ep:
ep_free(ep);
return error;
}

Expand Down Expand Up @@ -1404,21 +1581,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/*
* When we insert an epoll file descriptor, inside another epoll file
* descriptor, there is the change of creating closed loops, which are
* better be handled here, than in more critical paths.
* better be handled here, than in more critical paths. While we are
* checking for loops we also determine the list of files reachable
* and hang them on the tfile_check_list, so we can check that we
* haven't created too many possible wakeup paths.
*
* We hold epmutex across the loop check and the insert in this case, in
* order to prevent two separate inserts from racing and each doing the
* insert "at the same time" such that ep_loop_check passes on both
* before either one does the insert, thereby creating a cycle.
* We need to hold the epmutex across both ep_insert and ep_remove
* b/c we want to make sure we are looking at a coherent view of
* epoll network.
*/
if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
mutex_lock(&epmutex);
did_lock_epmutex = 1;
error = -ELOOP;
if (ep_loop_check(ep, tfile) != 0)
goto error_tgt_fput;
}

if (op == EPOLL_CTL_ADD) {
if (is_file_epoll(tfile)) {
error = -ELOOP;
if (ep_loop_check(ep, tfile) != 0)
goto error_tgt_fput;
} else
list_add(&tfile->f_tfile_llink, &tfile_check_list);
}

mutex_lock_nested(&ep->mtx, 0);

Expand All @@ -1437,6 +1620,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = ep_insert(ep, &epds, tfile, fd);
} else
error = -EEXIST;
clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
Expand All @@ -1455,7 +1639,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);

error_tgt_fput:
if (unlikely(did_lock_epmutex))
if (did_lock_epmutex)
mutex_unlock(&epmutex);

fput(tfile);
Expand Down
1 change: 1 addition & 0 deletions trunk/include/linux/eventpoll.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ struct file;
static inline void eventpoll_init_file(struct file *file)
{
INIT_LIST_HEAD(&file->f_ep_links);
INIT_LIST_HEAD(&file->f_tfile_llink);
}


Expand Down
1 change: 1 addition & 0 deletions trunk/include/linux/fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,7 @@ struct file {
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
#ifdef CONFIG_DEBUG_WRITECOUNT
Expand Down

0 comments on commit ab85276

Please sign in to comment.