Skip to content

Commit

Permalink
fs: allow detached mounts in clone_private_mount()
Browse files Browse the repository at this point in the history
In container workloads idmapped mounts are often used as layers for
overlayfs. Recently I added the ability to specify layers in overlayfs
as file descriptors instead of path names. It should be possible to
simply use the detached mounts directly when specifying layers instead
of having to attach them beforehand. They are discarded after overlayfs
is mounted anyway so it's pointless system calls for userspace and
pointless locking for the kernel.

This just recently come up again in [1]. So enable clone_private_mount()
to use detached mounts directly. Following conditions must be met:

- Provided path must be the root of a detached mount tree.
- Provided path may not create mount namespace loops.
- Provided path must be mounted.

It would be possible to be stricter and require that the caller must
have CAP_SYS_ADMIN in the owning user namespace of the anonymous mount
namespace but since this restriction isn't enforced for move_mount()
there's no point in enforcing it for clone_private_mount().

This contains a folded fix for:
Reported-by: syzbot+62dfea789a2cedac1298@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=62dfea789a2cedac1298
provided by Lizhi Xu <lizhi.xu@windriver.com> in [2].

Link: https://lore.kernel.org/r/20250207071331.550952-1-lizhi.xu@windriver.com [2]
Link: https://lore.kernel.org/r/fd8f6574-f737-4743-b220-79c815ee1554@mbaynton.com [1]
Link: https://lore.kernel.org/r/20250123-avancieren-erfreuen-3d61f6588fdd@brauner
Tested-by: Mike Baynton <mike@mbaynton.com>
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
Christian Brauner committed Feb 12, 2025
1 parent 29349a3 commit db04662
Showing 1 changed file with 43 additions and 35 deletions.
78 changes: 43 additions & 35 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -2369,6 +2369,28 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}

/*
* Check that there aren't references to earlier/same mount namespaces in the
* specified subtree. Such references can act as pins for mount namespaces
* that aren't checked by the mount-cycle checking code, thereby allowing
* cycles to be made.
*/
static bool check_for_nsfs_mounts(struct mount *subtree)
{
struct mount *p;
bool ret = false;

lock_mount_hash();
for (p = subtree; p; p = next_mnt(p, subtree))
if (mnt_ns_loop(p->mnt.mnt_root))
goto out;

ret = true;
out:
unlock_mount_hash();
return ret;
}

/**
* clone_private_mount - create a private clone of a path
* @path: path to clone
Expand All @@ -2377,37 +2399,45 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
* will not be attached anywhere in the namespace and will be private (i.e.
* changes to the originating mount won't be propagated into this).
*
* This assumes caller has called or done the equivalent of may_mount().
*
* Release with mntput().
*/
struct vfsmount *clone_private_mount(const struct path *path)
{
struct mount *old_mnt = real_mount(path->mnt);
struct mount *new_mnt;

down_read(&namespace_sem);
scoped_guard(rwsem_read, &namespace_sem)
if (IS_MNT_UNBINDABLE(old_mnt))
goto invalid;
return ERR_PTR(-EINVAL);

if (mnt_has_parent(old_mnt)) {
if (!check_mnt(old_mnt))
return ERR_PTR(-EINVAL);
} else {
if (!is_mounted(&old_mnt->mnt))
return ERR_PTR(-EINVAL);

if (!check_mnt(old_mnt))
goto invalid;
/* Make sure this isn't something purely kernel internal. */
if (!is_anon_ns(old_mnt->mnt_ns))
return ERR_PTR(-EINVAL);

/* Make sure we don't create mount namespace loops. */
if (!check_for_nsfs_mounts(old_mnt))
return ERR_PTR(-EINVAL);
}

if (has_locked_children(old_mnt, path->dentry))
goto invalid;
return ERR_PTR(-EINVAL);

new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
up_read(&namespace_sem);

if (IS_ERR(new_mnt))
return ERR_CAST(new_mnt);
return ERR_PTR(-EINVAL);

/* Longterm mount to be removed by kern_unmount*() */
new_mnt->mnt_ns = MNT_NS_INTERNAL;

return &new_mnt->mnt;

invalid:
up_read(&namespace_sem);
return ERR_PTR(-EINVAL);
}
EXPORT_SYMBOL_GPL(clone_private_mount);

Expand Down Expand Up @@ -3206,28 +3236,6 @@ static inline int tree_contains_unbindable(struct mount *mnt)
return 0;
}

/*
* Check that there aren't references to earlier/same mount namespaces in the
* specified subtree. Such references can act as pins for mount namespaces
* that aren't checked by the mount-cycle checking code, thereby allowing
* cycles to be made.
*/
static bool check_for_nsfs_mounts(struct mount *subtree)
{
struct mount *p;
bool ret = false;

lock_mount_hash();
for (p = subtree; p; p = next_mnt(p, subtree))
if (mnt_ns_loop(p->mnt.mnt_root))
goto out;

ret = true;
out:
unlock_mount_hash();
return ret;
}

static int do_set_group(struct path *from_path, struct path *to_path)
{
struct mount *from, *to;
Expand Down

0 comments on commit db04662

Please sign in to comment.