Skip to content

Commit

Permalink
fs/file.c: don't acquire files->file_lock in fd_install()
Browse files Browse the repository at this point in the history
Mateusz Guzik reported :

 Currently obtaining a new file descriptor results in locking fdtable
 twice - once in order to reserve a slot and second time to fill it.

Holding the spinlock in __fd_install() is needed in case a resize is
done, or to prevent a resize.

Mateusz provided an RFC patch and a micro benchmark :
  http://people.redhat.com/~mguzik/pipebench.c

A resize is an unlikely operation in a process lifetime,
as table size is at least doubled at every resize.

We can use RCU instead of the spinlock.

__fd_install() must wait if a resize is in progress.

The resize must block new __fd_install() callers from starting,
and wait that ongoing install are finished (synchronize_sched())

resize should be attempted by a single thread to not waste resources.

rcu_sched variant is used, as __fd_install() and expand_fdtable() run
from process context.

It gives us a ~30% speedup using pipebench on a dual Intel(R) Xeon(R)
CPU E5-2696 v2 @ 2.50GHz

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Mateusz Guzik <mguzik@redhat.com>
Acked-by: Mateusz Guzik <mguzik@redhat.com>
Tested-by: Mateusz Guzik <mguzik@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
Eric Dumazet authored and Al Viro committed Jul 1, 2015
1 parent 1af95de commit 8a81252
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 19 deletions.
4 changes: 4 additions & 0 deletions Documentation/filesystems/porting
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,7 @@ in your dentry operations instead.
dentry, it does not get nameidata at all and it gets called only when cookie
is non-NULL. Note that link body isn't available anymore, so if you need it,
store it as cookie.
--
[mandatory]
__fd_install() & fd_install() can now sleep. Callers should not
hold a spinlock or other resources that do not allow a schedule.
67 changes: 48 additions & 19 deletions fs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ static int expand_fdtable(struct files_struct *files, int nr)

spin_unlock(&files->file_lock);
new_fdt = alloc_fdtable(nr);

/* make sure all __fd_install() have seen resize_in_progress
* or have finished their rcu_read_lock_sched() section.
*/
if (atomic_read(&files->count) > 1)
synchronize_sched();

spin_lock(&files->file_lock);
if (!new_fdt)
return -ENOMEM;
Expand All @@ -158,21 +165,14 @@ static int expand_fdtable(struct files_struct *files, int nr)
__free_fdtable(new_fdt);
return -EMFILE;
}
/*
* Check again since another task may have expanded the fd table while
* we dropped the lock
*/
cur_fdt = files_fdtable(files);
if (nr >= cur_fdt->max_fds) {
/* Continue as planned */
copy_fdtable(new_fdt, cur_fdt);
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt != &files->fdtab)
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
} else {
/* Somebody else expanded, so undo our attempt */
__free_fdtable(new_fdt);
}
BUG_ON(nr < cur_fdt->max_fds);
copy_fdtable(new_fdt, cur_fdt);
rcu_assign_pointer(files->fdt, new_fdt);
if (cur_fdt != &files->fdtab)
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
/* coupled with smp_rmb() in __fd_install() */
smp_wmb();
return 1;
}

Expand All @@ -185,21 +185,38 @@ static int expand_fdtable(struct files_struct *files, int nr)
* The files->file_lock should be held on entry, and will be held on exit.
*/
static int expand_files(struct files_struct *files, int nr)
__releases(files->file_lock)
__acquires(files->file_lock)
{
struct fdtable *fdt;
int expanded = 0;

repeat:
fdt = files_fdtable(files);

/* Do we need to expand? */
if (nr < fdt->max_fds)
return 0;
return expanded;

/* Can we expand? */
if (nr >= sysctl_nr_open)
return -EMFILE;

if (unlikely(files->resize_in_progress)) {
spin_unlock(&files->file_lock);
expanded = 1;
wait_event(files->resize_wait, !files->resize_in_progress);
spin_lock(&files->file_lock);
goto repeat;
}

/* All good, so we try */
return expand_fdtable(files, nr);
files->resize_in_progress = true;
expanded = expand_fdtable(files, nr);
files->resize_in_progress = false;

wake_up_all(&files->resize_wait);
return expanded;
}

static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
Expand Down Expand Up @@ -256,6 +273,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
atomic_set(&newf->count, 1);

spin_lock_init(&newf->file_lock);
newf->resize_in_progress = false;
init_waitqueue_head(&newf->resize_wait);
newf->next_fd = 0;
new_fdt = &newf->fdtab;
new_fdt->max_fds = NR_OPEN_DEFAULT;
Expand Down Expand Up @@ -553,11 +572,21 @@ void __fd_install(struct files_struct *files, unsigned int fd,
struct file *file)
{
struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);

might_sleep();
rcu_read_lock_sched();

while (unlikely(files->resize_in_progress)) {
rcu_read_unlock_sched();
wait_event(files->resize_wait, !files->resize_in_progress);
rcu_read_lock_sched();
}
/* coupled with smp_wmb() in expand_fdtable() */
smp_rmb();
fdt = rcu_dereference_sched(files->fdt);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
rcu_read_unlock_sched();
}

void fd_install(unsigned int fd, struct file *file)
Expand Down
3 changes: 3 additions & 0 deletions include/linux/fdtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ struct files_struct {
* read mostly part
*/
atomic_t count;
bool resize_in_progress;
wait_queue_head_t resize_wait;

struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
Expand Down

0 comments on commit 8a81252

Please sign in to comment.