Skip to content

Commit

Permalink
vfs: fix dentry RCU to refcounting possibly sleeping dput()
Browse files Browse the repository at this point in the history
This is the fix that the last two commits indirectly led up to - making
sure that we don't call dput() in a bad context on the dentries we've
looked up in RCU mode after the sequence count validation fails.

This basically expands d_rcu_to_refcount() into the callers, and then
fixes the callers to delay the dput() in the failure case until _after_
we've dropped all locks and are no longer in an RCU-locked region.

The case of 'complete_walk()' was trivial, since its failure case did
the unlock_rcu_walk() directly after the call to d_rcu_to_refcount(),
and as such that is just a pure expansion of the function with a trivial
movement of the resulting dput() to after 'unlock_rcu_walk()'.

In contrast, the unlazy_walk() case was much more complicated, because
not only does convert two different dentries from RCU to be reference
counted, but it used to not call unlock_rcu_walk() at all, and instead
just returned an error and let the caller clean everything up in
"terminate_walk()".

Happily, one of the dentries in question (called "parent" inside
unlazy_walk()) is the dentry of "nd->path", which terminate_walk() wants
a refcount to anyway for the non-RCU case.

So what the new and improved unlazy_walk() does is to first turn that
dentry into a refcounted one, and once that is set up, the error cases
can continue to use the terminate_walk() helper for cleanup, but for the
non-RCU case.  Which makes it possible to drop out of RCU mode if we
actually hit the sequence number failure case.

Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Linus Torvalds committed Sep 9, 2013
1 parent 0d98439 commit e5c832d
Showing 1 changed file with 49 additions and 53 deletions.
102 changes: 49 additions & 53 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,37 +494,6 @@ static inline void unlock_rcu_walk(void)
br_read_unlock(&vfsmount_lock);
}

/*
* When we move over from the RCU domain to properly refcounted
* long-lived dentries, we need to check the sequence numbers
* we got before lookup very carefully.
*
* We cannot blindly increment a dentry refcount - even if it
* is not locked - if it is zero, because it may have gone
* through the final d_kill() logic already.
*
* So for a zero refcount, we need to get the spinlock (which is
* safe even for a dead dentry because the de-allocation is
* RCU-delayed), and check the sequence count under the lock.
*
* Once we have checked the sequence count, we know it is live,
* and since we hold the spinlock it cannot die from under us.
*
* In contrast, if the reference count wasn't zero, we can just
* increment the lockref without having to take the spinlock.
* Even if the sequence number ends up being stale, we haven't
* gone through the final dput() and killed the dentry yet.
*/
static inline int d_rcu_to_refcount(struct dentry *dentry, seqcount_t *validate, unsigned seq)
{
if (likely(lockref_get_not_dead(&dentry->d_lockref))) {
if (!read_seqcount_retry(validate, seq))
return 0;
dput(dentry);
}
return -ECHILD;
}

/**
* unlazy_walk - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
Expand All @@ -539,16 +508,29 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
{
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
int want_root = 0;

BUG_ON(!(nd->flags & LOOKUP_RCU));
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
want_root = 1;
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt ||
nd->root.dentry != fs->root.dentry)
goto err_root;
}

/*
* Get a reference to the parent first: we're
* going to make "path_put(nd->path)" valid in
* non-RCU context for "terminate_walk()".
*
* If this doesn't work, return immediately with
* RCU walking still active (and then we will do
* the RCU walk cleanup in terminate_walk()).
*/
if (!lockref_get_not_dead(&parent->d_lockref))
return -ECHILD;

/*
* After the mntget(), we terminate_walk() will do
* the right thing for non-RCU mode, and all our
* subsequent exit cases should unlock_rcu_walk()
* before returning.
*/
mntget(nd->path.mnt);
nd->flags &= ~LOOKUP_RCU;

/*
* For a negative lookup, the lookup sequence point is the parents
Expand All @@ -562,30 +544,39 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
* be valid if the child sequence number is still valid.
*/
if (!dentry) {
if (d_rcu_to_refcount(parent, &parent->d_seq, nd->seq) < 0)
goto err_root;
if (read_seqcount_retry(&parent->d_seq, nd->seq))
goto out;
BUG_ON(nd->inode != parent->d_inode);
} else {
if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0)
goto err_root;
if (d_rcu_to_refcount(parent, &dentry->d_seq, nd->seq) < 0)
goto err_parent;
if (!lockref_get_not_dead(&dentry->d_lockref))
goto out;
if (read_seqcount_retry(&dentry->d_seq, nd->seq))
goto drop_dentry;
}
if (want_root) {

/*
* Sequence counts matched. Now make sure that the root is
* still valid and get it if required.
*/
if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
spin_lock(&fs->lock);
if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry)
goto unlock_and_drop_dentry;
path_get(&nd->root);
spin_unlock(&fs->lock);
}
mntget(nd->path.mnt);

unlock_rcu_walk();
nd->flags &= ~LOOKUP_RCU;
return 0;

err_parent:
unlock_and_drop_dentry:
spin_unlock(&fs->lock);
drop_dentry:
unlock_rcu_walk();
dput(dentry);
err_root:
if (want_root)
spin_unlock(&fs->lock);
return -ECHILD;
out:
unlock_rcu_walk();
return -ECHILD;
}

Expand Down Expand Up @@ -614,10 +605,15 @@ static int complete_walk(struct nameidata *nd)
if (!(nd->flags & LOOKUP_ROOT))
nd->root.mnt = NULL;

if (d_rcu_to_refcount(dentry, &dentry->d_seq, nd->seq) < 0) {
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
unlock_rcu_walk();
return -ECHILD;
}
if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
unlock_rcu_walk();
dput(dentry);
return -ECHILD;
}
mntget(nd->path.mnt);
unlock_rcu_walk();
}
Expand Down

0 comments on commit e5c832d

Please sign in to comment.