Skip to content

Commit

Permalink
rename(): avoid a deadlock in the case of parents having no common an…
Browse files Browse the repository at this point in the history
…cestor

... and fix the directory locking documentation and proof of correctness.
Holding ->s_vfs_rename_mutex *almost* prevents ->d_parent changes; the
case where we really don't want it is splicing the root of disconnected
tree to somewhere.

In other words, ->s_vfs_rename_mutex is sufficient to stabilize "X is an
ancestor of Y" only if X and Y are already in the same tree.  Otherwise
it can go from false to true, and one can construct a deadlock on that.

Make lock_two_directories() report an error in such case and update the
callers of lock_rename()/lock_rename_child() to handle such errors.

And yes, such conditions are not impossible to create ;-/

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
  • Loading branch information
Al Viro committed Nov 25, 2023
1 parent dbd4540 commit a8b0026
Show file tree
Hide file tree
Showing 11 changed files with 313 additions and 118 deletions.
346 changes: 242 additions & 104 deletions Documentation/filesystems/directory-locking.rst

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions Documentation/filesystems/porting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1079,3 +1079,12 @@ On same-directory ->rename() the (tautological) update of .. is not protected
by any locks; just don't do it if the old parent is the same as the new one.
We really can't lock two subdirectories in same-directory rename - not without
deadlocks.

---

**mandatory**

lock_rename() and lock_rename_child() may fail in cross-directory case, if
their arguments do not have a common ancestor. In that case ERR_PTR(-EXDEV)
is returned, with no locks taken. In-tree users updated; out-of-tree ones
would need to do so.
2 changes: 2 additions & 0 deletions fs/cachefiles/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,

/* do the multiway lock magic */
trap = lock_rename(cache->graveyard, dir);
if (IS_ERR(trap))
return PTR_ERR(trap);

/* do some checks before getting the grave dentry */
if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
Expand Down
2 changes: 2 additions & 0 deletions fs/ecryptfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
target_inode = d_inode(new_dentry);

trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
if (IS_ERR(trap))
return PTR_ERR(trap);
dget(lower_new_dentry);
rc = -EINVAL;
if (lower_old_dentry->d_parent != lower_old_dir_dentry)
Expand Down
37 changes: 29 additions & 8 deletions fs/namei.c
Original file line number Diff line number Diff line change
Expand Up @@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap,
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
}

// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{
struct dentry *p;
struct dentry *p = p1, *q = p2, *r;

p = d_ancestor(p2, p1);
if (p) {
while ((r = p->d_parent) != p2 && r != p)
p = r;
if (r == p2) {
// p is a child of p2 and an ancestor of p1 or p1 itself
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p;
}

p = d_ancestor(p1, p2);
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return p;
// p is the root of connected component that contains p1
// p2 does not occur on the path from p to p1
while ((r = q->d_parent) != p1 && r != p && r != q)
q = r;
if (r == p1) {
// q is a child of p1 and an ancestor of p2 or p2 itself
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return q;
} else if (likely(r == p)) {
// both p2 and p1 are descendents of p
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return NULL;
} else { // no common ancestor at the time we'd been called
mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
return ERR_PTR(-EXDEV);
}
}

/*
Expand Down Expand Up @@ -4947,6 +4963,10 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,

retry_deleg:
trap = lock_rename(new_path.dentry, old_path.dentry);
if (IS_ERR(trap)) {
error = PTR_ERR(trap);
goto exit_lock_rename;
}

old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
lookup_flags);
Expand Down Expand Up @@ -5014,6 +5034,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
dput(old_dentry);
exit3:
unlock_rename(new_path.dentry, old_path.dentry);
exit_lock_rename:
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
Expand Down
4 changes: 4 additions & 0 deletions fs/nfsd/vfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1813,6 +1813,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
}

trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
goto out;
}
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
goto out_unlock;
Expand Down
9 changes: 6 additions & 3 deletions fs/overlayfs/copy_up.c
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
struct inode *inode;
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
struct path path = { .mnt = ovl_upper_mnt(ofs) };
struct dentry *temp, *upper;
struct dentry *temp, *upper, *trap;
struct ovl_cu_creds cc;
int err;
struct ovl_cattr cattr = {
Expand Down Expand Up @@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
* If temp was moved, abort without the cleanup.
*/
ovl_start_write(c->dentry);
if (lock_rename(c->workdir, c->destdir) != NULL ||
temp->d_parent != c->workdir) {
trap = lock_rename(c->workdir, c->destdir);
if (trap || temp->d_parent != c->workdir) {
err = -EIO;
if (IS_ERR(trap))
goto out;
goto unlock;
} else if (err) {
goto cleanup;
Expand Down Expand Up @@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
ovl_set_flag(OVL_WHITEOUTS, inode);
unlock:
unlock_rename(c->workdir, c->destdir);
out:
ovl_end_write(c->dentry);

return err;
Expand Down
4 changes: 4 additions & 0 deletions fs/overlayfs/dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}

trap = lock_rename(new_upperdir, old_upperdir);
if (IS_ERR(trap)) {
err = PTR_ERR(trap);
goto out_revert_creds;
}

olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
old->d_name.len);
Expand Down
6 changes: 4 additions & 2 deletions fs/overlayfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
bool ok = false;

if (workdir != upperdir) {
ok = (lock_rename(workdir, upperdir) == NULL);
unlock_rename(workdir, upperdir);
struct dentry *trap = lock_rename(workdir, upperdir);
if (!IS_ERR(trap))
unlock_rename(workdir, upperdir);
ok = (trap == NULL);
}
return ok;
}
Expand Down
7 changes: 6 additions & 1 deletion fs/overlayfs/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry)

int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
{
struct dentry *trap;

/* Workdir should not be the same as upperdir */
if (workdir == upperdir)
goto err;

/* Workdir should not be subdir of upperdir and vice versa */
if (lock_rename(workdir, upperdir) != NULL)
trap = lock_rename(workdir, upperdir);
if (IS_ERR(trap))
goto err;
if (trap)
goto err_unlock;

return 0;
Expand Down
5 changes: 5 additions & 0 deletions fs/smb/server/vfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,10 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto out2;

trap = lock_rename_child(old_child, new_path.dentry);
if (IS_ERR(trap)) {
err = PTR_ERR(trap);
goto out_drop_write;
}

old_parent = dget(old_child->d_parent);
if (d_unhashed(old_child)) {
Expand Down Expand Up @@ -770,6 +774,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
out3:
dput(old_parent);
unlock_rename(old_parent, new_path.dentry);
out_drop_write:
mnt_drop_write(old_path->mnt);
out2:
path_put(&new_path);
Expand Down

0 comments on commit a8b0026

Please sign in to comment.