Skip to content

Commit

Permalink
Merge patch series "mount notification"
Browse files Browse the repository at this point in the history
Miklos Szeredi <mszeredi@redhat.com> says:

This should be ready for adding to the v6.15 queue.  I don't see the
SELinux discussion converging, so I took the simpler version out of the
two that were suggested.

* patches from https://lore.kernel.org/r/20250129165803.72138-1-mszeredi@redhat.com:
  vfs: add notifications for mount attach and detach
  fanotify: notify on mount attach and detach
  fsnotify: add mount notification infrastructure

Link: https://lore.kernel.org/r/20250129165803.72138-1-mszeredi@redhat.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
  • Loading branch information
Christian Brauner committed Feb 5, 2025
2 parents 2014c95 + bf630c4 commit 2cc0b7f
Show file tree
Hide file tree
Showing 14 changed files with 393 additions and 36 deletions.
26 changes: 26 additions & 0 deletions fs/mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <linux/ns_common.h>
#include <linux/fs_pin.h>

extern struct list_head notify_list;

struct mnt_namespace {
struct ns_common ns;
struct mount * root;
Expand All @@ -21,6 +23,10 @@ struct mnt_namespace {
struct rcu_head mnt_ns_rcu;
};
u64 event;
#ifdef CONFIG_FSNOTIFY
__u32 n_fsnotify_mask;
struct fsnotify_mark_connector __rcu *n_fsnotify_marks;
#endif
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
Expand Down Expand Up @@ -76,6 +82,8 @@ struct mount {
#ifdef CONFIG_FSNOTIFY
struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
struct list_head to_notify; /* need to queue notification */
struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */
#endif
int mnt_id; /* mount identifier, reused */
u64 mnt_id_unique; /* mount ID unique until reboot */
Expand Down Expand Up @@ -177,3 +185,21 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}

#ifdef CONFIG_FSNOTIFY
static inline void mnt_notify_add(struct mount *m)
{
/* Optimize the case where there are no watches */
if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) ||
(m->prev_ns && m->prev_ns->n_fsnotify_marks))
list_add_tail(&m->to_notify, &notify_list);
else
m->prev_ns = m->mnt_ns;
}
#else
static inline void mnt_notify_add(struct mount *m)
{
}
#endif

struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
93 changes: 89 additions & 4 deletions fs/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
static DEFINE_SEQLOCK(mnt_ns_tree_lock);

#ifdef CONFIG_FSNOTIFY
LIST_HEAD(notify_list); /* protected by namespace_sem */
#endif
static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */

Expand Down Expand Up @@ -163,6 +166,7 @@ static void mnt_ns_release(struct mnt_namespace *ns)
{
/* keep alive for {list,stat}mount() */
if (refcount_dec_and_test(&ns->passive)) {
fsnotify_mntns_delete(ns);
put_user_ns(ns->user_ns);
kfree(ns);
}
Expand Down Expand Up @@ -1176,6 +1180,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt)
ns->mnt_first_node = &mnt->mnt_node;
rb_link_node(&mnt->mnt_node, parent, link);
rb_insert_color(&mnt->mnt_node, &ns->mounts);

mnt_notify_add(mnt);
}

/*
Expand Down Expand Up @@ -1723,6 +1729,50 @@ int may_umount(struct vfsmount *mnt)

EXPORT_SYMBOL(may_umount);

#ifdef CONFIG_FSNOTIFY
static void mnt_notify(struct mount *p)
{
if (!p->prev_ns && p->mnt_ns) {
fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
} else if (p->prev_ns && !p->mnt_ns) {
fsnotify_mnt_detach(p->prev_ns, &p->mnt);
} else if (p->prev_ns == p->mnt_ns) {
fsnotify_mnt_move(p->mnt_ns, &p->mnt);
} else {
fsnotify_mnt_detach(p->prev_ns, &p->mnt);
fsnotify_mnt_attach(p->mnt_ns, &p->mnt);
}
p->prev_ns = p->mnt_ns;
}

static void notify_mnt_list(void)
{
struct mount *m, *tmp;
/*
* Notify about mounts that were added/reparented/detached/remain
* connected after unmount.
*/
list_for_each_entry_safe(m, tmp, &notify_list, to_notify) {
mnt_notify(m);
list_del_init(&m->to_notify);
}
}

static bool need_notify_mnt_list(void)
{
return !list_empty(&notify_list);
}
#else
static void notify_mnt_list(void)
{
}

static bool need_notify_mnt_list(void)
{
return false;
}
#endif

static void namespace_unlock(void)
{
struct hlist_head head;
Expand All @@ -1733,7 +1783,18 @@ static void namespace_unlock(void)
hlist_move_list(&unmounted, &head);
list_splice_init(&ex_mountpoints, &list);

up_write(&namespace_sem);
if (need_notify_mnt_list()) {
/*
* No point blocking out concurrent readers while notifications
* are sent. This will also allow statmount()/listmount() to run
* concurrently.
*/
downgrade_write(&namespace_sem);
notify_mnt_list();
up_read(&namespace_sem);
} else {
up_write(&namespace_sem);
}

shrink_dentry_list(&list);

Expand Down Expand Up @@ -1846,6 +1907,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
change_mnt_propagation(p, MS_PRIVATE);
if (disconnect)
hlist_add_head(&p->mnt_umount, &unmounted);

/*
* At this point p->mnt_ns is NULL, notification will be queued
* only if
*
* - p->prev_ns is non-NULL *and*
* - p->prev_ns->n_fsnotify_marks is non-NULL
*
* This will preclude queuing the mount if this is a cleanup
* after a failed copy_tree() or destruction of an anonymous
* namespace, etc.
*/
mnt_notify_add(p);
}
}

Expand Down Expand Up @@ -2145,16 +2219,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr
}
}

struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry)
{
if (!is_mnt_ns_file(dentry))
return NULL;

return to_mnt_ns(get_proc_ns(dentry->d_inode));
}

static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct mnt_namespace *mnt_ns;
if (!is_mnt_ns_file(dentry))
struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry);

if (!mnt_ns)
return false;

mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}

Expand Down Expand Up @@ -2547,6 +2629,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
dest_mp = smp;
unhash_mnt(source_mnt);
attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
mnt_notify_add(source_mnt);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
if (source_mnt->mnt_ns) {
Expand Down Expand Up @@ -4468,6 +4551,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
list_del_init(&new_mnt->mnt_expire);
put_mountpoint(root_mp);
unlock_mount_hash();
mnt_notify_add(root_mnt);
mnt_notify_add(new_mnt);
chroot_fs_refs(&root, &new);
error = 0;
out4:
Expand Down
38 changes: 35 additions & 3 deletions fs/notify/fanotify/fanotify.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
return fanotify_error_event_equal(FANOTIFY_EE(old),
FANOTIFY_EE(new));
case FANOTIFY_EVENT_TYPE_MNT:
return false;
default:
WARN_ON_ONCE(1);
}
Expand Down Expand Up @@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
__func__, iter_info->report_mask, event_mask, data, data_type);

if (!fid_mode) {
if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) {
if (data_type != FSNOTIFY_EVENT_MNT)
return 0;
} else if (!fid_mode) {
/* Do we have path to open a file descriptor? */
if (!path)
return 0;
Expand Down Expand Up @@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
return &pevent->fae;
}

static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp)
{
struct fanotify_mnt_event *pevent;

pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp);
if (!pevent)
return NULL;

pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT;
pevent->mnt_id = mnt_id;

return &pevent->fae;
}

static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
int data_type,
gfp_t gfp)
Expand Down Expand Up @@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event(
fid_mode);
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
u64 mnt_id = fsnotify_data_mnt_id(data, data_type);
struct mem_cgroup *old_memcg;
struct dentry *moved = NULL;
struct inode *child = NULL;
Expand Down Expand Up @@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event(
moved, &hash, gfp);
} else if (fid_mode) {
event = fanotify_alloc_fid_event(id, fsid, &hash, gfp);
} else {
} else if (path) {
event = fanotify_alloc_path_event(path, &hash, gfp);
} else if (mnt_id) {
event = fanotify_alloc_mnt_event(mnt_id, gfp);
} else {
WARN_ON_ONCE(1);
}

if (!event)
Expand Down Expand Up @@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);

BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24);

mask = fanotify_group_event_mask(group, iter_info, &match_mask,
mask, data, data_type, dir);
Expand Down Expand Up @@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group,
mempool_free(fee, &group->fanotify_data.error_events_pool);
}

static void fanotify_free_mnt_event(struct fanotify_event *event)
{
kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event));
}

static void fanotify_free_event(struct fsnotify_group *group,
struct fsnotify_event *fsn_event)
{
Expand All @@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group,
case FANOTIFY_EVENT_TYPE_FS_ERROR:
fanotify_free_error_event(group, event);
break;
case FANOTIFY_EVENT_TYPE_MNT:
fanotify_free_mnt_event(event);
break;
default:
WARN_ON_ONCE(1);
}
Expand Down
18 changes: 18 additions & 0 deletions fs/notify/fanotify/fanotify.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache;
extern struct kmem_cache *fanotify_fid_event_cachep;
extern struct kmem_cache *fanotify_path_event_cachep;
extern struct kmem_cache *fanotify_perm_event_cachep;
extern struct kmem_cache *fanotify_mnt_event_cachep;

/* Possible states of the permission event */
enum {
Expand Down Expand Up @@ -244,6 +245,7 @@ enum fanotify_event_type {
FANOTIFY_EVENT_TYPE_PATH_PERM,
FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
FANOTIFY_EVENT_TYPE_MNT,
__FANOTIFY_EVENT_TYPE_NUM
};

Expand Down Expand Up @@ -409,12 +411,23 @@ struct fanotify_path_event {
struct path path;
};

struct fanotify_mnt_event {
struct fanotify_event fae;
u64 mnt_id;
};

static inline struct fanotify_path_event *
FANOTIFY_PE(struct fanotify_event *event)
{
return container_of(event, struct fanotify_path_event, fae);
}

static inline struct fanotify_mnt_event *
FANOTIFY_ME(struct fanotify_event *event)
{
return container_of(event, struct fanotify_mnt_event, fae);
}

/*
* Structure for permission fanotify events. It gets allocated and freed in
* fanotify_handle_event() since we wait there for user response. When the
Expand Down Expand Up @@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask)
return mask & FAN_FS_ERROR;
}

static inline bool fanotify_is_mnt_event(u32 mask)
{
return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH);
}

static inline const struct path *fanotify_event_path(struct fanotify_event *event)
{
if (event->type == FANOTIFY_EVENT_TYPE_PATH)
Expand Down
Loading

0 comments on commit 2cc0b7f

Please sign in to comment.