Skip to content

Commit

Permalink
fs/epoll: use faster rb_first_cached()
Browse files Browse the repository at this point in the history
...  such that we can avoid the tree walks to get the node with the
smallest key.  Semantically the same, as the previously used rb_first(),
but O(1).  The main overhead is the extra footprint for the cached rb_node
pointer, which should not matter for epoll.

Link: http://lkml.kernel.org/r/20170719014603.19029-15-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Davidlohr Bueso authored and Linus Torvalds committed Sep 9, 2017
1 parent 410bd5e commit b2ac2ea
Showing 1 changed file with 16 additions and 14 deletions.
30 changes: 16 additions & 14 deletions fs/eventpoll.c
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ struct eventpoll {
struct list_head rdllist;

/* RB tree root used to store monitored fd structs */
struct rb_root rbr;
struct rb_root_cached rbr;

/*
* This is a single linked list that chains all the "struct epitem" that
Expand Down Expand Up @@ -796,7 +796,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
list_del_rcu(&epi->fllink);
spin_unlock(&file->f_lock);

rb_erase(&epi->rbn, &ep->rbr);
rb_erase_cached(&epi->rbn, &ep->rbr);

spin_lock_irqsave(&ep->lock, flags);
if (ep_is_linked(&epi->rdllink))
Expand Down Expand Up @@ -840,7 +840,7 @@ static void ep_free(struct eventpoll *ep)
/*
* Walks through the whole tree by unregistering poll callbacks.
*/
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);

ep_unregister_pollwait(ep, epi);
Expand All @@ -856,7 +856,7 @@ static void ep_free(struct eventpoll *ep)
* a lockdep warning.
*/
mutex_lock(&ep->mtx);
while ((rbp = rb_first(&ep->rbr)) != NULL) {
while ((rbp = rb_first_cached(&ep->rbr)) != NULL) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_remove(ep, epi);
cond_resched();
Expand Down Expand Up @@ -963,7 +963,7 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
struct rb_node *rbp;

mutex_lock(&ep->mtx);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
struct inode *inode = file_inode(epi->ffd.file);

Expand Down Expand Up @@ -1040,7 +1040,7 @@ static int ep_alloc(struct eventpoll **pep)
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT;
ep->rbr = RB_ROOT_CACHED;
ep->ovflist = EP_UNACTIVE_PTR;
ep->user = user;

Expand All @@ -1066,7 +1066,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
struct epoll_filefd ffd;

ep_set_ffd(&ffd, file, fd);
for (rbp = ep->rbr.rb_node; rbp; ) {
for (rbp = ep->rbr.rb_root.rb_node; rbp; ) {
epi = rb_entry(rbp, struct epitem, rbn);
kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
if (kcmp > 0)
Expand All @@ -1088,7 +1088,7 @@ static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long t
struct rb_node *rbp;
struct epitem *epi;

for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (epi->ffd.fd == tfd) {
if (toff == 0)
Expand Down Expand Up @@ -1273,20 +1273,22 @@ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
{
int kcmp;
struct rb_node **p = &ep->rbr.rb_node, *parent = NULL;
struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL;
struct epitem *epic;
bool leftmost = true;

while (*p) {
parent = *p;
epic = rb_entry(parent, struct epitem, rbn);
kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
if (kcmp > 0)
if (kcmp > 0) {
p = &parent->rb_right;
else
leftmost = false;
} else
p = &parent->rb_left;
}
rb_link_node(&epi->rbn, parent, p);
rb_insert_color(&epi->rbn, &ep->rbr);
rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost);
}


Expand Down Expand Up @@ -1530,7 +1532,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);

rb_erase(&epi->rbn, &ep->rbr);
rb_erase_cached(&epi->rbn, &ep->rbr);

error_unregister:
ep_unregister_pollwait(ep, epi);
Expand Down Expand Up @@ -1878,7 +1880,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
mutex_lock_nested(&ep->mtx, call_nests + 1);
ep->visited = 1;
list_add(&ep->visited_list_link, &visited_list);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data;
Expand Down

0 comments on commit b2ac2ea

Please sign in to comment.