From 784ed4354c9077501b3a9236bafea23e5b878703 Mon Sep 17 00:00:00 2001 From: Christian Brauner <brauner@kernel.org> Date: Tue, 4 Feb 2025 12:27:46 +0100 Subject: [PATCH 1/4] uidgid: add map_id_range_up() Add map_id_range_up() to verify that the full kernel id range can be mapped up in a given idmapping. This will be used in follow-up patches. Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-1-007720f39f2e@kernel.org Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org> --- include/linux/uidgid.h | 6 ++++++ kernel/user_namespace.c | 26 +++++++++++++++++--------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index f85ec5613721f..2dc767e08f544 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) u32 map_id_down(struct uid_gid_map *map, u32 id); u32 map_id_up(struct uid_gid_map *map, u32 id); +u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count); #else @@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id) return id; } +static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) +{ + return id; +} + static inline u32 map_id_up(struct uid_gid_map *map, u32 id) { return id; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index aa0b2e47f2f21..682f40d5632d4 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -238,7 +238,7 @@ EXPORT_SYMBOL(__put_user_ns); struct idmap_key { bool map_up; /* true -> id from kid; false -> kid from id */ u32 id; /* id to find */ - u32 count; /* == 0 unless used with map_id_range_down() */ + u32 count; }; /* @@ -343,16 +343,19 @@ u32 map_id_down(struct uid_gid_map *map, u32 id) * UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * -map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id) +map_id_range_up_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { unsigned idx; - u32 first, last; + u32 first, last, id2; + + id2 = id + count - 1; /* Find the matching extent */ for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; - if (id >= first && id <= last) + if (id >= first && id <= last && + (id2 >= first && id2 <= last)) return &map->extent[idx]; } return NULL; @@ -363,28 +366,28 @@ map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id) * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * -map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id) +map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { struct idmap_key key; key.map_up = true; - key.count = 1; + key.count = count; key.id = id; return bsearch(&key, map->reverse, extents, sizeof(struct uid_gid_extent), cmp_map_id); } -u32 map_id_up(struct uid_gid_map *map, u32 id) +u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) { struct uid_gid_extent *extent; unsigned extents = map->nr_extents; smp_rmb(); if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS) - extent = map_id_up_base(extents, map, id); + extent = map_id_range_up_base(extents, map, id, count); else - extent = map_id_up_max(extents, map, id); + extent = map_id_range_up_max(extents, map, id, count); /* Map the id or note failure */ if (extent) @@ -395,6 +398,11 @@ u32 map_id_up(struct uid_gid_map *map, u32 id) return id; } +u32 map_id_up(struct uid_gid_map *map, u32 id) +{ + return map_id_range_up(map, id, 1); +} + /** * make_kuid - Map a user-namespace uid pair into a kuid. * @ns: User namespace that the uid is in From 37c4a9590e1efcae7749682239fc22a330d2d325 Mon Sep 17 00:00:00 2001 From: Christian Brauner <brauner@kernel.org> Date: Tue, 4 Feb 2025 12:27:47 +0100 Subject: [PATCH 2/4] statmount: allow to retrieve idmappings This adds the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP options. It allows the retrieval of idmappings via statmount(). Currently it isn't possible to figure out what idmappings are applied to an idmapped mount. This information is often crucial. Before statmount() the only realistic options for an interface like this would have been to add it to /proc/<pid>/fdinfo/<nr> or to expose it in /proc/<pid>/mountinfo. Both solution would have been pretty ugly and would've shown information that is of strong interest to some application but not all. statmount() is perfect for this. The idmappings applied to an idmapped mount are shown relative to the caller's user namespace. This is the most useful solution that doesn't risk leaking information or confuse the caller. For example, an idmapped mount might have been created with the following idmappings: mount --bind -o X-mount.idmap="0:10000:1000 2000:2000:1 3000:3000:1" /srv /opt Listing the idmappings through statmount() in the same context shows: mnt_id: 2147485088 mnt_parent_id: 2147484816 fs_type: btrfs mnt_root: /srv mnt_point: /opt mnt_opts: ssd,discard=async,space_cache=v2,subvolid=5,subvol=/ mnt_uidmap[0]: 0 10000 1000 mnt_uidmap[1]: 2000 2000 1 mnt_uidmap[2]: 3000 3000 1 mnt_gidmap[0]: 0 10000 1000 mnt_gidmap[1]: 2000 2000 1 mnt_gidmap[2]: 3000 3000 1 But the idmappings might not always be resolvable in the caller's user namespace. For example: unshare --user --map-root In this case statmount() will skip any mappings that fil to resolve in the caller's idmapping: mnt_id: 2147485087 mnt_parent_id: 2147484016 fs_type: btrfs mnt_root: /srv mnt_point: /opt mnt_opts: ssd,discard=async,space_cache=v2,subvolid=5,subvol=/ The caller can differentiate between a mount not being idmapped and a mount that is idmapped but where all mappings fail to resolve in the caller's idmapping by check for the STATMOUNT_MNT_{G,U}IDMAP flag being raised but the number of mappings in ->mnt_{g,u}idmap_num being zero. Note that statmount() requires that the whole range must be resolvable in the caller's user namespace. If a subrange fails to map it will still list the map as not resolvable. This is a practical compromise to avoid having to find which subranges are resovable and wich aren't. Idmappings are listed as a string array with each mapping separated by zero bytes. This allows to retrieve the idmappings and immediately use them for writing to e.g., /proc/<pid>/{g,u}id_map and it also allow for simple iteration like: if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { const char *idmap = stmnt->str + stmnt->mnt_uidmap; for (size_t idx = 0; idx < stmnt->mnt_uidmap_nr; idx++) { printf("mnt_uidmap[%lu]: %s\n", idx, idmap); idmap += strlen(idmap) + 1; } } Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-2-007720f39f2e@kernel.org Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org> --- fs/internal.h | 1 + fs/mnt_idmapping.c | 51 ++++++++++++++++++++++++++++++ fs/namespace.c | 59 ++++++++++++++++++++++++++++++++++- include/linux/mnt_idmapping.h | 5 +++ include/uapi/linux/mount.h | 8 ++++- 5 files changed, 122 insertions(+), 2 deletions(-) diff --git a/fs/internal.h b/fs/internal.h index e7f02ae1e0981..db6094d5cb0b6 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path) return path->mnt->mnt_root == path->dentry; } void file_f_owner_release(struct file *file); +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 7b1df8cc2821d..a37991fdb194d 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -6,6 +6,7 @@ #include <linux/mnt_idmapping.h> #include <linux/slab.h> #include <linux/user_namespace.h> +#include <linux/seq_file.h> #include "internal.h" @@ -334,3 +335,53 @@ void mnt_idmap_put(struct mnt_idmap *idmap) free_mnt_idmap(idmap); } EXPORT_SYMBOL_GPL(mnt_idmap_put); + +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map) +{ + struct uid_gid_map *map, *map_up; + u32 idx, nr_mappings; + + if (!is_valid_mnt_idmap(idmap)) + return 0; + + /* + * Idmappings are shown relative to the caller's idmapping. + * This is both the most intuitive and most useful solution. + */ + if (uid_map) { + map = &idmap->uid_map; + map_up = ¤t_user_ns()->uid_map; + } else { + map = &idmap->gid_map; + map_up = ¤t_user_ns()->gid_map; + } + + for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) { + uid_t lower; + struct uid_gid_extent *extent; + + if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) + extent = &map->extent[idx]; + else + extent = &map->forward[idx]; + + /* + * Verify that the whole range of the mapping can be + * resolved in the caller's idmapping. If it cannot be + * resolved skip the mapping. + */ + lower = map_id_range_up(map_up, extent->lower_first, extent->count); + if (lower == (uid_t) -1) + continue; + + seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); + + seq->count++; /* mappings are separated by \0 */ + if (seq_has_overflowed(seq)) + return -EAGAIN; + + nr_mappings++; + } + + return nr_mappings; +} diff --git a/fs/namespace.c b/fs/namespace.c index d470a369d42bc..967ee2a5bacf3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5008,6 +5008,7 @@ struct kstatmount { struct statmount __user *buf; size_t bufsize; struct vfsmount *mnt; + struct mnt_idmap *idmap; u64 mask; struct path root; struct statmount sm; @@ -5278,6 +5279,46 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq) return 0; } +static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + + ret = statmount_mnt_idmap(s->idmap, seq, true); + if (ret < 0) + return ret; + + s->sm.mnt_uidmap_num = ret; + /* + * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid + * mappings. This allows userspace to distinguish between a + * non-idmapped mount and an idmapped mount where none of the + * individual mappings are valid in the caller's idmapping. + */ + if (is_valid_mnt_idmap(s->idmap)) + s->sm.mask |= STATMOUNT_MNT_UIDMAP; + return 0; +} + +static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + + ret = statmount_mnt_idmap(s->idmap, seq, false); + if (ret < 0) + return ret; + + s->sm.mnt_gidmap_num = ret; + /* + * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid + * mappings. This allows userspace to distinguish between a + * non-idmapped mount and an idmapped mount where none of the + * individual mappings are valid in the caller's idmapping. + */ + if (is_valid_mnt_idmap(s->idmap)) + s->sm.mask |= STATMOUNT_MNT_GIDMAP; + return 0; +} + static int statmount_string(struct kstatmount *s, u64 flag) { int ret = 0; @@ -5319,6 +5360,14 @@ static int statmount_string(struct kstatmount *s, u64 flag) sm->sb_source = start; ret = statmount_sb_source(s, seq); break; + case STATMOUNT_MNT_UIDMAP: + sm->mnt_uidmap = start; + ret = statmount_mnt_uidmap(s, seq); + break; + case STATMOUNT_MNT_GIDMAP: + sm->mnt_gidmap = start; + ret = statmount_mnt_gidmap(s, seq); + break; default: WARN_ON_ONCE(true); return -EINVAL; @@ -5443,6 +5492,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, return err; s->root = root; + s->idmap = mnt_idmap(s->mnt); if (s->mask & STATMOUNT_SB_BASIC) statmount_sb_basic(s); @@ -5476,6 +5526,12 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, if (!err && s->mask & STATMOUNT_SB_SOURCE) err = statmount_string(s, STATMOUNT_SB_SOURCE); + if (!err && s->mask & STATMOUNT_MNT_UIDMAP) + err = statmount_string(s, STATMOUNT_MNT_UIDMAP); + + if (!err && s->mask & STATMOUNT_MNT_GIDMAP) + err = statmount_string(s, STATMOUNT_MNT_GIDMAP); + if (!err && s->mask & STATMOUNT_MNT_NS_ID) statmount_mnt_ns_id(s, ns); @@ -5499,7 +5555,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \ - STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY) + STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \ + STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP) static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, struct statmount __user *buf, size_t bufsize, diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index b1b219bc34224..e71a6070a8f8c 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); +static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) +{ + return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; +} + #ifdef CONFIG_MULTIUSER static inline uid_t __vfsuid_val(vfsuid_t uid) { diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index c07008816acae..0be6ac4c16243 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -179,7 +179,11 @@ struct statmount { __u32 opt_array; /* [str] Array of nul terminated fs options */ __u32 opt_sec_num; /* Number of security options */ __u32 opt_sec_array; /* [str] Array of nul terminated security options */ - __u64 __spare2[46]; + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[44]; char str[]; /* Variable size part containing strings */ }; @@ -217,6 +221,8 @@ struct mnt_id_req { #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_MNT_UIDMAP 0x00001000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00002000U /* Want/got gidmap... */ /* * Special @mnt_id values that can be passed to listmount From a496dfecbc47253012f1e10d01110428bb9dc118 Mon Sep 17 00:00:00 2001 From: Christian Brauner <brauner@kernel.org> Date: Tue, 4 Feb 2025 12:27:48 +0100 Subject: [PATCH 3/4] samples/vfs: check whether flag was raised For string options the kernel will raise the corresponding flag only if the string isn't empty. So check for the flag. Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-3-007720f39f2e@kernel.org Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org> --- samples/vfs/test-list-all-mounts.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c index 1a02ea4593e39..ce272ded8a79a 100644 --- a/samples/vfs/test-list-all-mounts.c +++ b/samples/vfs/test-list-all-mounts.c @@ -138,10 +138,11 @@ int main(int argc, char *argv[]) printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n\n", (uint64_t)stmnt->mnt_id, (uint64_t)stmnt->mnt_parent_id, - stmnt->str + stmnt->fs_type, - stmnt->str + stmnt->mnt_root, - stmnt->str + stmnt->mnt_point, - stmnt->str + stmnt->mnt_opts); + (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "", + (stmnt->mask & STATMOUNT_MNT_ROOT) ? stmnt->str + stmnt->mnt_root : "", + (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "", + (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : ""); + free(stmnt); } } From fa204a65f1b65664eb938940c73cdfc0dcfd578e Mon Sep 17 00:00:00 2001 From: Christian Brauner <brauner@kernel.org> Date: Tue, 4 Feb 2025 12:27:49 +0100 Subject: [PATCH 4/4] samples/vfs: add STATMOUNT_MNT_{G,U}IDMAP Illustrate how to use STATMOUNT_MNT_{G,U}IDMAP. Link: https://lore.kernel.org/r/20250204-work-mnt_idmap-statmount-v2-4-007720f39f2e@kernel.org Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org> --- samples/vfs/samples-vfs.h | 14 +++++++++++++- samples/vfs/test-list-all-mounts.c | 26 ++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/samples/vfs/samples-vfs.h b/samples/vfs/samples-vfs.h index 103e1e7c4cec2..498baf581b563 100644 --- a/samples/vfs/samples-vfs.h +++ b/samples/vfs/samples-vfs.h @@ -42,7 +42,11 @@ struct statmount { __u32 opt_array; /* [str] Array of nul terminated fs options */ __u32 opt_sec_num; /* Number of security options */ __u32 opt_sec_array; /* [str] Array of nul terminated security options */ - __u64 __spare2[46]; + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings */ + __u64 __spare2[44]; char str[]; /* Variable size part containing strings */ }; @@ -158,6 +162,14 @@ struct mnt_ns_info { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #endif +#ifndef STATMOUNT_MNT_UIDMAP +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#endif + +#ifndef STATMOUNT_MNT_GIDMAP +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ +#endif + #ifndef MOUNT_ATTR_RDONLY #define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ #endif diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c index ce272ded8a79a..bb3b83d8f1d7c 100644 --- a/samples/vfs/test-list-all-mounts.c +++ b/samples/vfs/test-list-all-mounts.c @@ -128,14 +128,16 @@ int main(int argc, char *argv[]) STATMOUNT_MNT_POINT | STATMOUNT_MNT_NS_ID | STATMOUNT_MNT_OPTS | - STATMOUNT_FS_TYPE, 0); + STATMOUNT_FS_TYPE | + STATMOUNT_MNT_UIDMAP | + STATMOUNT_MNT_GIDMAP, 0); if (!stmnt) { printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n", (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id); continue; } - printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n\n", + printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n", (uint64_t)stmnt->mnt_id, (uint64_t)stmnt->mnt_parent_id, (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "", @@ -143,6 +145,26 @@ int main(int argc, char *argv[]) (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "", (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : ""); + if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_uidmap; + + for (size_t idx = 0; idx < stmnt->mnt_uidmap_num; idx++) { + printf("mnt_uidmap[%lu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + if (stmnt->mask & STATMOUNT_MNT_GIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_gidmap; + + for (size_t idx = 0; idx < stmnt->mnt_gidmap_num; idx++) { + printf("mnt_gidmap[%lu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + printf("\n"); + free(stmnt); } }