Skip to content

Commit

Permalink
fs: keep an index of current mount namespaces
Browse files Browse the repository at this point in the history
In order to allow for listmount() to be used on different namespaces we
need a way to lookup a mount ns by its id.  Keep a rbtree of the current
!anonymous mount name spaces indexed by ID that we can use to look up
the namespace.

Co-developed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/e5fdd78a90f5b00a75bd893962a70f52a2c015cd.1719243756.git.josef@toxicpanda.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
Josef Bacik authored and Christian Brauner committed Jun 28, 2024
1 parent f3107df commit 1901c92
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 2 deletions.
2 changes: 2 additions & 0 deletions fs/mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;

struct mnt_pcp {
Expand Down
113 changes: 111 additions & 2 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_RWLOCK(mnt_ns_tree_lock);
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */

struct mount_kattr {
unsigned int attr_set;
Expand All @@ -103,6 +105,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);

static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
{
u64 seq_b = ns->seq;

if (seq < seq_b)
return -1;
if (seq > seq_b)
return 1;
return 0;
}

static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
{
if (!node)
return NULL;
return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
}

static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
{
struct mnt_namespace *ns_a = node_to_mnt_ns(a);
struct mnt_namespace *ns_b = node_to_mnt_ns(b);
u64 seq_a = ns_a->seq;

return mnt_ns_cmp(seq_a, ns_b) < 0;
}

static void mnt_ns_tree_add(struct mnt_namespace *ns)
{
guard(write_lock)(&mnt_ns_tree_lock);
rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
}

static void mnt_ns_release(struct mnt_namespace *ns)
{
lockdep_assert_not_held(&mnt_ns_tree_lock);

/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
put_user_ns(ns->user_ns);
kfree(ns);
}
}
DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))

static void mnt_ns_tree_remove(struct mnt_namespace *ns)
{
/* remove from global mount namespace list */
if (!is_anon_ns(ns)) {
guard(write_lock)(&mnt_ns_tree_lock);
rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
}

mnt_ns_release(ns);
}

/*
* Returns the mount namespace which either has the specified id, or has the
* next smallest id afer the specified one.
*/
static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
{
struct rb_node *node = mnt_ns_tree.rb_node;
struct mnt_namespace *ret = NULL;

lockdep_assert_held(&mnt_ns_tree_lock);

while (node) {
struct mnt_namespace *n = node_to_mnt_ns(node);

if (mnt_ns_id <= n->seq) {
ret = node_to_mnt_ns(node);
if (mnt_ns_id == n->seq)
break;
node = node->rb_left;
} else {
node = node->rb_right;
}
}
return ret;
}

/*
* Lookup a mount namespace by id and take a passive reference count. Taking a
* passive reference means the mount namespace can be emptied if e.g., the last
* task holding an active reference exits. To access the mounts of the
* namespace the @namespace_sem must first be acquired. If the namespace has
* already shut down before acquiring @namespace_sem, {list,stat}mount() will
* see that the mount rbtree of the namespace is empty.
*/
static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
{
struct mnt_namespace *ns;

guard(read_lock)(&mnt_ns_tree_lock);
ns = mnt_ns_find_id_at(mnt_ns_id);
if (!ns || ns->seq != mnt_ns_id)
return NULL;

refcount_inc(&ns->passive);
return ns;
}

static inline void lock_mount_hash(void)
{
write_seqlock(&mount_lock);
Expand Down Expand Up @@ -3733,8 +3838,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns))
ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
kfree(ns);
mnt_ns_tree_remove(ns);
}

/*
Expand Down Expand Up @@ -3773,7 +3877,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
Expand Down Expand Up @@ -3850,6 +3956,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old);
}
mnt_ns_tree_add(new_ns);
namespace_unlock();

if (rootmnt)
Expand Down Expand Up @@ -5205,6 +5312,8 @@ static void __init init_mount_tree(void)

set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);

mnt_ns_tree_add(ns);
}

void __init mnt_init(void)
Expand Down

0 comments on commit 1901c92

Please sign in to comment.