From 4f911138c8da94bcff84f1d093d28e378703c43f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:16 +0300 Subject: [PATCH 01/14] fs: add generic helper for filling statx attribute flags The immutable and append-only properties on an inode are published on the inode's i_flags and enforced by the VFS. Create a helper to fill the corresponding STATX_ATTR_ flags in the kstat structure from the inode's i_flags. Only orange was converted to use this helper. Other filesystems could use it in the future. Suggested-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/orangefs/inode.c | 7 +------ fs/stat.c | 18 ++++++++++++++++++ include/linux/fs.h | 1 + include/linux/stat.h | 4 ++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 16ac617df7d7f..c1bb4c4b5d672 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -882,12 +882,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, if (!(request_mask & STATX_SIZE)) stat->result_mask &= ~STATX_SIZE; - stat->attributes_mask = STATX_ATTR_IMMUTABLE | - STATX_ATTR_APPEND; - if (inode->i_flags & S_IMMUTABLE) - stat->attributes |= STATX_ATTR_IMMUTABLE; - if (inode->i_flags & S_APPEND) - stat->attributes |= STATX_ATTR_APPEND; + generic_fill_statx_attr(inode, stat); } return ret; } diff --git a/fs/stat.c b/fs/stat.c index 1fa38bdec1a68..28d2020ba1f42 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -59,6 +59,24 @@ void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode, } EXPORT_SYMBOL(generic_fillattr); +/** + * generic_fill_statx_attr - Fill in the statx attributes from the inode flags + * @inode: Inode to use as the source + * @stat: Where to fill in the attribute flags + * + * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the + * inode that are published on i_flags and enforced by the VFS. + */ +void generic_fill_statx_attr(struct inode *inode, struct kstat *stat) +{ + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS; +} +EXPORT_SYMBOL(generic_fill_statx_attr); + /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216c..ae6c6c34db944 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3355,6 +3355,7 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *); +void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/linux/stat.h b/include/linux/stat.h index fff27e6038141..7df06931f25d8 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -34,6 +34,10 @@ struct kstat { STATX_ATTR_ENCRYPTED | \ STATX_ATTR_VERITY \ )/* Attrs corresponding to FS_*_FL flags */ +#define KSTAT_ATTR_VFS_FLAGS \ + (STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND \ + ) /* Attrs corresponding to S_* flags that are enforced by the VFS */ u64 ino; dev_t dev; dev_t rdev; From a0c236b11706cc223252ad97e80871a18d9ee812 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:17 +0300 Subject: [PATCH 02/14] ovl: pass ovl_fs to ovl_check_setxattr() Instead of passing the overlay dentry. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 10 +++++----- fs/overlayfs/dir.c | 6 ++++-- fs/overlayfs/namei.c | 2 +- fs/overlayfs/overlayfs.h | 6 +++--- fs/overlayfs/util.c | 7 +++---- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 2846b943e80c1..3fa68a5cc16ea 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -331,8 +331,8 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, return ERR_PTR(err); } -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, - struct dentry *lower, struct dentry *upper) +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; @@ -351,7 +351,7 @@ int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, /* * Do not fail when upper doesn't support xattrs. */ - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf, + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); kfree(fh); @@ -526,13 +526,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp); + err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); if (err) return err; } if (c->metacopy) { - err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY, + err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY, NULL, 0, -EOPNOTSUPP); if (err) return err; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93efe7048a771..258434567a343 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -233,9 +233,10 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr) static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, int xerr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); @@ -1043,6 +1044,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir) static int ovl_set_redirect(struct dentry *dentry, bool samedir) { int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); const char *redirect = ovl_dentry_get_redirect(dentry); bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir); @@ -1053,7 +1055,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, redirect, strlen(redirect), -EXDEV); if (!err) { diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 210cd6f66e28e..da063b18b4195 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -811,7 +811,7 @@ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, if (err) return err; - err = ovl_set_origin(ofs, dentry, lower, upper); + err = ovl_set_origin(ofs, lower, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6ec73db4bf9e6..e5dabf7ef339a 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -320,7 +320,7 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags); bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry); bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry, enum ovl_xattr ox); -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); @@ -561,8 +561,8 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, int ovl_set_attr(struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, - struct dentry *lower, struct dentry *upper); +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b9d03627f364c..81b8f135445a8 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -600,12 +600,11 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), }; -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr) { int err; - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; if (ofs->noxattr) return xerr; @@ -623,6 +622,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; if (ovl_test_flag(OVL_IMPURE, d_inode(dentry))) @@ -632,8 +632,7 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) * Do not fail when upper doesn't support xattrs. * Upper inodes won't have origin nor redirect xattr anyway. */ - err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, - "y", 1, 0); + err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0); if (!err) ovl_set_flag(OVL_IMPURE, d_inode(dentry)); From 72db82115d2bdfbfba8b15a92d91872cfe1b40c6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:18 +0300 Subject: [PATCH 03/14] ovl: copy up sync/noatime fileattr flags When a lower file has sync/noatime fileattr flags, the behavior of overlayfs post copy up is inconsistent. Immediately after copy up, ovl inode still has the S_SYNC/S_NOATIME inode flags copied from lower inode, so vfs code still treats the ovl inode as sync/noatime. After ovl inode evict or mount cycle, the ovl inode does not have these inode flags anymore. To fix this inconsistency, try to copy the fileattr flags on copy up if the upper fs supports the fileattr_set() method. This gives consistent behavior post copy up regardless of inode eviction from cache. We cannot copy up the immutable/append-only inode flags in a similar manner, because immutable/append-only inodes cannot be linked and because overlayfs will not be able to set overlay.* xattr on the upper inodes. Those flags will be addressed by a followup patch. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 51 ++++++++++++++++++++++++++++++++++------ fs/overlayfs/inode.c | 44 ++++++++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 15 +++++++++++- 3 files changed, 89 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 3fa68a5cc16ea..daf2afa603d31 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,31 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return error; } +static int ovl_copy_fileattr(struct path *old, struct path *new) +{ + struct fileattr oldfa = { .flags_valid = true }; + struct fileattr newfa = { .flags_valid = true }; + int err; + + err = ovl_real_fileattr_get(old, &oldfa); + if (err) + return err; + + err = ovl_real_fileattr_get(new, &newfa); + if (err) + return err; + + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); + newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; + newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); + + BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK; + newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK); + + return ovl_real_fileattr_set(new, &newfa); +} + static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old, struct path *new, loff_t len) { @@ -493,20 +519,21 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct inode *inode = d_inode(c->dentry); + struct path upperpath, datapath; int err; + ovl_path_upper(c->dentry, &upperpath); + if (WARN_ON(upperpath.dentry != NULL)) + return -EIO; + + upperpath.dentry = temp; + /* * Copy up data first and then xattrs. Writing data after * xattrs will remove security.capability xattr automatically. */ if (S_ISREG(c->stat.mode) && !c->metacopy) { - struct path upperpath, datapath; - - ovl_path_upper(c->dentry, &upperpath); - if (WARN_ON(upperpath.dentry != NULL)) - return -EIO; - upperpath.dentry = temp; - ovl_path_lowerdata(c->dentry, &datapath); err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size); @@ -518,6 +545,16 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) if (err) return err; + if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) { + /* + * Copy the fileattr inode flags that are the source of already + * copied i_flags + */ + err = ovl_copy_fileattr(&c->lowerpath, &upperpath); + if (err) + return err; + } + /* * Store identifier of lower inode in upper inode xattr to * allow lookup of the copy up origin inode. diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 5e828a1c98a8c..b288843e6b422 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -503,16 +503,14 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Introducing security_inode_fileattr_get/set() hooks would solve this issue * properly. */ -static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa, +static int ovl_security_fileattr(struct path *realpath, struct fileattr *fa, bool set) { - struct path realpath; struct file *file; unsigned int cmd; int err; - ovl_path_real(dentry, &realpath); - file = dentry_open(&realpath, O_RDONLY, current_cred()); + file = dentry_open(realpath, O_RDONLY, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); @@ -527,11 +525,22 @@ static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa, return err; } +int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, true); + if (err) + return err; + + return vfs_fileattr_set(&init_user_ns, realpath->dentry, fa); +} + int ovl_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct dentry *upperdentry; + struct path upperpath; const struct cred *old_cred; int err; @@ -541,12 +550,10 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, err = ovl_copy_up(dentry); if (!err) { - upperdentry = ovl_dentry_upper(dentry); + ovl_path_real(dentry, &upperpath); old_cred = ovl_override_creds(inode->i_sb); - err = ovl_security_fileattr(dentry, fa, true); - if (!err) - err = vfs_fileattr_set(&init_user_ns, upperdentry, fa); + err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); ovl_copyflags(ovl_inode_real(inode), inode); } @@ -555,17 +562,28 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, return err; } +int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, false); + if (err) + return err; + + return vfs_fileattr_get(realpath->dentry, fa); +} + int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct dentry *realdentry = ovl_dentry_real(dentry); + struct path realpath; const struct cred *old_cred; int err; + ovl_path_real(dentry, &realpath); + old_cred = ovl_override_creds(inode->i_sb); - err = ovl_security_fileattr(dentry, fa, false); - if (!err) - err = vfs_fileattr_get(realdentry, fa); + err = ovl_real_fileattr_get(&realpath, fa); revert_creds(old_cred); return err; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e5dabf7ef339a..2cbebe06d9adb 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -518,9 +518,20 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) i_size_write(to, i_size_read(from)); } +/* vfs inode flags copied from real to ovl inode */ +#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) + +/* + * fileattr flags copied from lower to upper inode on copy up. + * We cannot copy immutable/append-only flags, because that would prevevnt + * linking temp inode to upper dir. + */ +#define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) +#define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) + static inline void ovl_copyflags(struct inode *from, struct inode *to) { - unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; + unsigned int mask = OVL_COPY_I_FLAGS_MASK; inode_set_flags(to, from->i_flags & mask, mask); } @@ -548,6 +559,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); extern const struct file_operations ovl_file_operations; int __init ovl_aio_request_cache_init(void); void ovl_aio_request_cache_destroy(void); +int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa); +int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); int ovl_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); From 096a218a588d78d699adcacb6919cff4718c4cac Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:19 +0300 Subject: [PATCH 04/14] ovl: consistent behavior for immutable/append-only inodes When a lower file has immutable/append-only fileattr flags, the behavior of overlayfs post copy up is inconsistent. Immediattely after copy up, ovl inode still has the S_IMMUTABLE/S_APPEND inode flags copied from lower inode, so vfs code still treats the ovl inode as immutable/append-only. After ovl inode evict or mount cycle, the ovl inode does not have these inode flags anymore. We cannot copy up the immutable and append-only fileattr flags, because immutable/append-only inodes cannot be linked and because overlayfs will not be able to set overlay.* xattr on the upper inodes. Instead, if any of the fileattr flags of interest exist on the lower inode, we store them in overlay.protattr xattr on the upper inode and we read the flags from xattr on lookup and on fileattr_get(). This gives consistent behavior post copy up regardless of inode eviction from cache. When user sets new fileattr flags, we update or remove the overlay.protattr xattr. Storing immutable/append-only fileattr flags in an xattr instead of upper fileattr also solves other non-standard behavior issues - overlayfs can now copy up children of "ovl-immutable" directories and lower aliases of "ovl-immutable" hardlinks. Reported-by: Chengguang Xu Link: https://lore.kernel.org/linux-unionfs/20201226104618.239739-1-cgxu519@mykernel.net/ Link: https://lore.kernel.org/linux-unionfs/20210210190334.1212210-5-amir73il@gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 17 +++++++- fs/overlayfs/inode.c | 50 +++++++++++++++++++++-- fs/overlayfs/overlayfs.h | 13 +++++- fs/overlayfs/util.c | 85 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index daf2afa603d31..9d8ebf0e72375 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -131,7 +131,8 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return error; } -static int ovl_copy_fileattr(struct path *old, struct path *new) +static int ovl_copy_fileattr(struct inode *inode, struct path *old, + struct path *new) { struct fileattr oldfa = { .flags_valid = true }; struct fileattr newfa = { .flags_valid = true }; @@ -145,6 +146,18 @@ static int ovl_copy_fileattr(struct path *old, struct path *new) if (err) return err; + /* + * We cannot set immutable and append-only flags on upper inode, + * because we would not be able to link upper inode to upper dir + * not set overlay private xattr on upper inode. + * Store these flags in overlay.protattr xattr instead. + */ + if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { + err = ovl_set_protattr(inode, new->dentry, &oldfa); + if (err) + return err; + } + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); @@ -550,7 +563,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * Copy the fileattr inode flags that are the source of already * copied i_flags */ - err = ovl_copy_fileattr(&c->lowerpath, &upperpath); + err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath); if (err) return err; } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b288843e6b422..37300e972a398 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -162,7 +162,8 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, enum ovl_path_type type; struct path realpath; const struct cred *old_cred; - bool is_dir = S_ISDIR(dentry->d_inode->i_mode); + struct inode *inode = d_inode(dentry); + bool is_dir = S_ISDIR(inode->i_mode); int fsid = 0; int err; bool metacopy_blocks = false; @@ -175,6 +176,9 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, if (err) goto out; + /* Report the effective immutable/append-only STATX flags */ + generic_fill_statx_attr(inode, stat); + /* * For non-dir or same fs, we use st_ino of the copy up origin. * This guaranties constant st_dev/st_ino across copy up. @@ -542,6 +546,7 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, struct inode *inode = d_inode(dentry); struct path upperpath; const struct cred *old_cred; + unsigned int flags; int err; err = ovl_want_write(dentry); @@ -553,15 +558,49 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, ovl_path_real(dentry, &upperpath); old_cred = ovl_override_creds(inode->i_sb); - err = ovl_real_fileattr_set(&upperpath, fa); + /* + * Store immutable/append-only flags in xattr and clear them + * in upper fileattr (in case they were set by older kernel) + * so children of "ovl-immutable" directories lower aliases of + * "ovl-immutable" hardlinks could be copied up. + * Clear xattr when flags are cleared. + */ + err = ovl_set_protattr(inode, upperpath.dentry, fa); + if (!err) + err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); - ovl_copyflags(ovl_inode_real(inode), inode); + + /* + * Merge real inode flags with inode flags read from + * overlay.protattr xattr + */ + flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; + + BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); + flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; + inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); } ovl_drop_write(dentry); out: return err; } +/* Convert inode protection flags to fileattr flags */ +static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) +{ + BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); + BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + + if (inode->i_flags & S_APPEND) { + fa->flags |= FS_APPEND_FL; + fa->fsx_xflags |= FS_XFLAG_APPEND; + } + if (inode->i_flags & S_IMMUTABLE) { + fa->flags |= FS_IMMUTABLE_FL; + fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; + } +} + int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) { int err; @@ -584,6 +623,7 @@ int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) old_cred = ovl_override_creds(inode->i_sb); err = ovl_real_fileattr_get(&realpath, fa); + ovl_fileattr_prot_flags(inode, fa); revert_creds(old_cred); return err; @@ -1136,6 +1176,10 @@ struct inode *ovl_get_inode(struct super_block *sb, } } + /* Check for immutable/append-only inode flags in xattr */ + if (upperdentry) + ovl_check_protattr(inode, upperdentry); + if (inode->i_state & I_NEW) unlock_new_inode(inode); out: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2cbebe06d9adb..2433cc030c870 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -34,6 +34,7 @@ enum ovl_xattr { OVL_XATTR_NLINK, OVL_XATTR_UPPER, OVL_XATTR_METACOPY, + OVL_XATTR_PROTATTR, }; enum ovl_inode_flag { @@ -520,14 +521,22 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* vfs inode flags copied from real to ovl inode */ #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) +/* vfs inode flags read from overlay.protattr xattr to ovl inode */ +#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) /* * fileattr flags copied from lower to upper inode on copy up. - * We cannot copy immutable/append-only flags, because that would prevevnt - * linking temp inode to upper dir. + * We cannot copy up immutable/append-only flags, because that would prevent + * linking temp inode to upper dir, so we store them in xattr instead. */ #define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) #define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) +#define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL) +#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE) + +void ovl_check_protattr(struct inode *inode, struct dentry *upper); +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 81b8f135445a8..f48284a2a8960 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -585,6 +586,7 @@ bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry, #define OVL_XATTR_NLINK_POSTFIX "nlink" #define OVL_XATTR_UPPER_POSTFIX "upper" #define OVL_XATTR_METACOPY_POSTFIX "metacopy" +#define OVL_XATTR_PROTATTR_POSTFIX "protattr" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -598,6 +600,7 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK), OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER), OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, @@ -639,6 +642,88 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) return err; } + +#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */ + +void ovl_check_protattr(struct inode *inode, struct dentry *upper) +{ + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK; + char buf[OVL_PROTATTR_MAX+1]; + int res, n; + + res = ovl_do_getxattr(ofs, upper, OVL_XATTR_PROTATTR, buf, + OVL_PROTATTR_MAX); + if (res < 0) + return; + + /* + * Initialize inode flags from overlay.protattr xattr and upper inode + * flags. If upper inode has those fileattr flags set (i.e. from old + * kernel), we do not clear them on ovl_get_inode(), but we will clear + * them on next fileattr_set(). + */ + for (n = 0; n < res; n++) { + if (buf[n] == 'a') + iflags |= S_APPEND; + else if (buf[n] == 'i') + iflags |= S_IMMUTABLE; + else + break; + } + + if (!res || n < res) { + pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n", + upper, res); + } else { + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + } +} + +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa) +{ + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + char buf[OVL_PROTATTR_MAX]; + int len = 0, err = 0; + u32 iflags = 0; + + BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX); + + if (fa->flags & FS_APPEND_FL) { + buf[len++] = 'a'; + iflags |= S_APPEND; + } + if (fa->flags & FS_IMMUTABLE_FL) { + buf[len++] = 'i'; + iflags |= S_IMMUTABLE; + } + + /* + * Do not allow to set protection flags when upper doesn't support + * xattrs, because we do not set those fileattr flags on upper inode. + * Remove xattr if it exist and all protection flags are cleared. + */ + if (len) { + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR, + buf, len, -EPERM); + } else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) { + err = ovl_do_removexattr(ofs, upper, OVL_XATTR_PROTATTR); + if (err == -EOPNOTSUPP || err == -ENODATA) + err = 0; + } + if (err) + return err; + + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + + /* Mask out the fileattr flags that should not be set in upper inode */ + fa->flags &= ~OVL_PROT_FS_FLAGS_MASK; + fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK; + + return 0; +} + /** * Caller must hold a reference to inode to prevent it from being freed while * it is marked inuse. From e4522bc8733dce1cb4443f1d506869781ee9caa8 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:45 +0200 Subject: [PATCH 05/14] ovl: disable decoding null uuid with redirect_dir Currently decoding origin with lower null uuid is not allowed unless user opted-in to one of the new features that require following the lower inode of non-dir upper (index, xino, metacopy). Now we add redirect_dir too to that feature list. Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b01d4147520d3..97ea35fdd933f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1600,7 +1600,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * lower inode of non-dir upper. */ if (!ofs->config.index && !ofs->config.metacopy && - ofs->config.xino != OVL_XINO_ON && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON && uuid_is_null(uuid)) return false; From ca45275cd6b63e917eef681f9f15978b96afdbbe Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:46 +0200 Subject: [PATCH 06/14] ovl: add ovl_allow_offline_changes() helper Allows to check whether any of extended features are enabled Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 12 ++++++++++++ fs/overlayfs/super.c | 4 +--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2433cc030c870..e9b3e7880fc01 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -263,6 +263,18 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs) +{ + /* + * To avoid regressions in existing setups with overlay lower offline + * changes, we allow lower changes only if none of the new features + * are used. + */ + return (!ofs->config.index && !ofs->config.metacopy && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON); +} + + /* util.c */ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 97ea35fdd933f..178daa5e82c9d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1599,9 +1599,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * user opted-in to one of the new features that require following the * lower inode of non-dir upper. */ - if (!ofs->config.index && !ofs->config.metacopy && - !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON && - uuid_is_null(uuid)) + if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) return false; for (i = 0; i < ofs->numfs; i++) { From 1fc31aac96d7060ecee18124be6de18cb2268922 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:47 +0200 Subject: [PATCH 07/14] ovl: do not set overlay.opaque for new directories Enable optimizations only if user opted-in for any of extended features. If optimization is enabled, it breaks existing use case when a lower layer directory appears after directory was created on a merged layer. If overlay.opaque is applied, new files on lower layer are not visible. Consider the following scenario: - /lower and /upper are mounted to /merged - directory /merged/new-dir is created with a file test1 - overlay is unmounted - directory /lower/new-dir is created with a file test2 - overlay is mounted again If opaque is applied by default, file test2 is not going to be visible without explicitly clearing the overlay.opaque attribute Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 258434567a343..9154222883e64 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -321,6 +321,7 @@ static bool ovl_type_origin(struct dentry *dentry) static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct ovl_cattr *attr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *udir = upperdir->d_inode; struct dentry *newdentry; @@ -339,7 +340,8 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) goto out_unlock; - if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && + !ovl_allow_offline_changes(ofs)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } From ffb24e3c657869b256c3f90792d262fe09f49628 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 27 Apr 2021 13:28:26 +0300 Subject: [PATCH 08/14] ovl: relax lookup error on mismatch origin ftype We get occasional reports of lookup errors due to mismatched origin ftype from users that re-format a lower squashfs image. Commit 13c6ad0f45fd ("ovl: document lower modification caveats") tries to discourage the practice of re-formating lower layers and describes the expected behavior as undefined. Commit b0e0f69731cd ("ovl: restrict lower null uuid for "xino=auto"") limits the configurations in which origin file handles are followed. In addition to these measures, change the behavior in case of detecting a mismatch origin ftype in lookup to issue a warning, not follow origin, but not fail the lookup operation either. That should make overall more users happy without any big consequences. Link: https://lore.kernel.org/linux-unionfs/CAOQ4uxgPq9E9xxwU2CDyHy-_yCZZeymg+3n+-6AqkGGE1YtwvQ@mail.gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index da063b18b4195..1a9b515fc45d4 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -392,7 +392,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); dput(origin); - return -EIO; + return -ESTALE; } static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, From b71759ef1e1730db81dab98e9dab9455e8c7f5a2 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 24 Apr 2021 22:03:15 +0800 Subject: [PATCH 09/14] ovl: skip checking lower file's i_writecount on truncate It is possible that a directory tree is shared between multiple overlay instances as a lower layer. In this case when one instance executes a file residing on the lower layer, the other instance denies a truncate(2) call on this file. This only happens for truncate(2) and not for open(2) with the O_TRUNC flag. Fix this interference and inconsistency by removing the preliminary i_writecount check before copy-up. This means that unlike on normal filesystems truncate(argv[0]) will now succeed. If this ever causes a regression in a real world use case this needs to be revisited. One way to fix this properly would be to keep a correct i_writecount in the overlay inode, but that is difficult due to memory mapping code only dealing with the real file/inode. Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.rst | 3 +++ fs/overlayfs/inode.c | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 455ca86eb4fce..7da6c30ed596a 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -427,6 +427,9 @@ b) If a file residing on a lower layer is opened for read-only and then memory mapped with MAP_SHARED, then subsequent changes to the file are not reflected in the memory mapping. +c) If a file residing on a lower layer is being executed, then opening that +file for write or truncating the file will not be denied with ETXTBSY. + The following options allow overlayfs to act more like a standards compliant filesystem: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 37300e972a398..8aa370e8143a7 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -33,12 +33,6 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto out; if (attr->ia_valid & ATTR_SIZE) { - struct inode *realinode = d_inode(ovl_dentry_real(dentry)); - - err = -ETXTBSY; - if (atomic_read(&realinode->i_writecount) < 0) - goto out_drop_write; - /* Truncate should trigger data copy up as well */ full_copy_up = true; } From d8991e8622e758b718e2e4291d31dd0bea4e14a4 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 10 Mar 2021 10:09:25 +0800 Subject: [PATCH 10/14] ovl: update ctime when changing fileattr Currently we keep size, mode and times of overlay inode as the same as upper inode, so should update ctime when changing file attribution as well. Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 8aa370e8143a7..7d52e5ef2ac74 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -573,6 +573,9 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); + + /* Update ctime */ + ovl_copyattr(ovl_inode_real(inode), inode); } ovl_drop_write(dentry); out: From f945ca1963c8bd29471020d7c58c594ee7007006 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 22 Jul 2021 14:18:14 +0200 Subject: [PATCH 11/14] ovl: use kvalloc in xattr copy-up Extended attributes are usually small, but could be up to 64k in size, so use the most efficient method for doing the allocation. Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9d8ebf0e72375..4e7d5bfa2949f 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -63,7 +63,7 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return list_size; } - buf = kzalloc(list_size, GFP_KERNEL); + buf = kvzalloc(list_size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -106,11 +106,12 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, if (size > value_size) { void *new; - new = krealloc(value, size, GFP_KERNEL); + new = kvmalloc(size, GFP_KERNEL); if (!new) { error = -ENOMEM; break; } + kvfree(value); value = new; value_size = size; goto retry; @@ -125,9 +126,9 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, error = 0; } } - kfree(value); + kvfree(value); out: - kfree(buf); + kvfree(buf); return error; } From 52d5a0c6bd8a89f460243ed937856354f8f253a3 Mon Sep 17 00:00:00 2001 From: chenying Date: Mon, 16 Aug 2021 18:02:56 +0800 Subject: [PATCH 12/14] ovl: fix BUG_ON() in may_delete() when called from ovl_cleanup() If function ovl_instantiate() returns an error, ovl_cleanup will be called and try to remove newdentry from wdir, but the newdentry has been moved to udir at this time. This will causes BUG_ON(victim->d_parent->d_inode != dir) in fs/namei.c:may_delete. Signed-off-by: chenying Fixes: 01b39dcc9568 ("ovl: use inode_insert5() to hash a newly created inode") Link: https://lore.kernel.org/linux-unionfs/e6496a94-a161-dc04-c38a-d2544633acb4@bytedance.com/ Cc: # v4.18 Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 9154222883e64..1fefb2b8960e9 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -545,8 +545,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_cleanup; } err = ovl_instantiate(dentry, inode, newdentry, hardlink); - if (err) - goto out_cleanup; + if (err) { + ovl_cleanup(udir, newdentry); + dput(newdentry); + } out_dput: dput(upper); out_unlock: From 0cad6246621b5887d5b33fea84219d2a71f2f99a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Aug 2021 22:08:24 +0200 Subject: [PATCH 13/14] vfs: add rcu argument to ->get_acl() callback Add a rcu argument to the ->get_acl() callback to allow get_cached_acl_rcu() to call the ->get_acl() method in the next patch. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/locking.rst | 2 +- Documentation/filesystems/vfs.rst | 2 +- fs/9p/acl.c | 5 ++++- fs/9p/acl.h | 2 +- fs/bad_inode.c | 2 +- fs/btrfs/acl.c | 5 ++++- fs/btrfs/ctree.h | 2 +- fs/ceph/acl.c | 5 ++++- fs/ceph/super.h | 2 +- fs/erofs/xattr.c | 5 ++++- fs/erofs/xattr.h | 2 +- fs/ext2/acl.c | 5 ++++- fs/ext2/acl.h | 2 +- fs/ext4/acl.c | 5 ++++- fs/ext4/acl.h | 2 +- fs/f2fs/acl.c | 5 ++++- fs/f2fs/acl.h | 2 +- fs/fuse/acl.c | 5 ++++- fs/fuse/fuse_i.h | 2 +- fs/gfs2/acl.c | 5 ++++- fs/gfs2/acl.h | 2 +- fs/jffs2/acl.c | 5 ++++- fs/jffs2/acl.h | 2 +- fs/jfs/acl.c | 5 ++++- fs/jfs/jfs_acl.h | 2 +- fs/nfs/nfs3_fs.h | 2 +- fs/nfs/nfs3acl.c | 5 ++++- fs/ocfs2/acl.c | 5 ++++- fs/ocfs2/acl.h | 2 +- fs/orangefs/acl.c | 5 ++++- fs/orangefs/orangefs-kernel.h | 2 +- fs/overlayfs/inode.c | 5 ++++- fs/overlayfs/overlayfs.h | 2 +- fs/posix_acl.c | 2 +- fs/reiserfs/acl.h | 2 +- fs/reiserfs/xattr_acl.c | 5 ++++- fs/xfs/xfs_acl.c | 5 ++++- fs/xfs/xfs_acl.h | 4 ++-- include/linux/fs.h | 2 +- 39 files changed, 91 insertions(+), 40 deletions(-) diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 2183fd8cc3504..899fa9aba01a9 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -70,7 +70,7 @@ prototypes:: const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); - int (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl)(struct inode *, int, bool); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 14c31eced416a..bf5c48066fac5 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -432,7 +432,7 @@ As of kernel 2.6.22, the following members are defined: const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct user_namespace *, struct inode *, int); - int (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl)(struct inode *, int, bool); int (*setattr) (struct user_namespace *, struct dentry *, struct iattr *); int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); diff --git a/fs/9p/acl.c b/fs/9p/acl.c index bb1b286c49ae4..c381499f54160 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -97,10 +97,13 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) return acl; } -struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type) +struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu) { struct v9fs_session_info *v9ses; + if (rcu) + return ERR_PTR(-ECHILD); + v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { diff --git a/fs/9p/acl.h b/fs/9p/acl.h index e4f7e882272b7..d43c8949e807b 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -16,7 +16,7 @@ #ifdef CONFIG_9P_FS_POSIX_ACL extern int v9fs_get_acl(struct inode *, struct p9_fid *); -extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); +extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu); extern int v9fs_acl_chmod(struct inode *, struct p9_fid *); extern int v9fs_set_create_acl(struct inode *, struct p9_fid *, struct posix_acl *, struct posix_acl *); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 48e16144c1f7b..12b8fdcc445bb 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -121,7 +121,7 @@ static const char *bad_inode_get_link(struct dentry *dentry, return ERR_PTR(-EIO); } -static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type) +static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type, bool rcu) { return ERR_PTR(-EIO); } diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index d95eb5c8cb373..3d00bb5deded3 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -16,13 +16,16 @@ #include "btrfs_inode.h" #include "xattr.h" -struct posix_acl *btrfs_get_acl(struct inode *inode, int type) +struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu) { int size; const char *name; char *value = NULL; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e5e53e592d4f9..ca5c7cb1b729b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3686,7 +3686,7 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -struct posix_acl *btrfs_get_acl(struct inode *inode, int type); +struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int btrfs_init_acl(struct btrfs_trans_handle *trans, diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 529af59d9fd30..f4fc8e0b847cc 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -29,7 +29,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, spin_unlock(&ci->i_ceph_lock); } -struct posix_acl *ceph_get_acl(struct inode *inode, int type) +struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu) { int size; unsigned int retry_cnt = 0; @@ -37,6 +37,9 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) char *value = NULL; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 9215a2f4535c8..b9512684e150c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1087,7 +1087,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx); /* acl.c */ #ifdef CONFIG_CEPH_FS_POSIX_ACL -struct posix_acl *ceph_get_acl(struct inode *, int); +struct posix_acl *ceph_get_acl(struct inode *, int, bool); int ceph_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int ceph_pre_init_acls(struct inode *dir, umode_t *mode, diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 8dd54b420a1d6..778f2c52295d1 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -673,12 +673,15 @@ ssize_t erofs_listxattr(struct dentry *dentry, } #ifdef CONFIG_EROFS_FS_POSIX_ACL -struct posix_acl *erofs_get_acl(struct inode *inode, int type) +struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; int prefix, rc; char *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index 366dcb400525f..94090c74b3f7a 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -80,7 +80,7 @@ static inline int erofs_getxattr(struct inode *inode, int index, #endif /* !CONFIG_EROFS_FS_XATTR */ #ifdef CONFIG_EROFS_FS_POSIX_ACL -struct posix_acl *erofs_get_acl(struct inode *inode, int type); +struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu); #else #define erofs_get_acl (NULL) #endif diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index b9a9db98e94b9..bf298967c5b81 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -141,13 +141,16 @@ ext2_acl_to_disk(const struct posix_acl *acl, size_t *size) * inode->i_mutex: don't care */ struct posix_acl * -ext2_get_acl(struct inode *inode, int type) +ext2_get_acl(struct inode *inode, int type, bool rcu) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 917db5f6630a7..925ab6287d35a 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -55,7 +55,7 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL /* acl.c */ -extern struct posix_acl *ext2_get_acl(struct inode *inode, int type); +extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu); extern int ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index c5eaffccecc33..0613dfcbfd4aa 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -142,13 +142,16 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) * inode->i_mutex: don't care */ struct posix_acl * -ext4_get_acl(struct inode *inode, int type) +ext4_get_acl(struct inode *inode, int type, bool rcu) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 84b8942a57f23..3219669732bf4 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -55,7 +55,7 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL /* acl.c */ -struct posix_acl *ext4_get_acl(struct inode *inode, int type); +struct posix_acl *ext4_get_acl(struct inode *inode, int type, bool rcu); int ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 239ad9453b998..16e826e01f095 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -196,8 +196,11 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, return acl; } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu) { + if (rcu) + return ERR_PTR(-ECHILD); + return __f2fs_get_acl(inode, type, NULL); } diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 986fd1bc780bb..a26e33cab4fff 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -33,7 +33,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *, int); +extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool); extern int f2fs_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 52b165319be1f..337cb29a8dd5c 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -11,7 +11,7 @@ #include #include -struct posix_acl *fuse_get_acl(struct inode *inode, int type) +struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) { struct fuse_conn *fc = get_fuse_conn(inode); int size; @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + if (fuse_is_bad(inode)) return ERR_PTR(-EIO); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 07829ce78695b..f4140943311a2 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1216,7 +1216,7 @@ extern const struct xattr_handler *fuse_acl_xattr_handlers[]; extern const struct xattr_handler *fuse_no_acl_xattr_handlers[]; struct posix_acl; -struct posix_acl *fuse_get_acl(struct inode *inode, int type); +struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu); int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 9165d70ead077..734d1f05d8236 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -57,13 +57,16 @@ static struct posix_acl *__gfs2_get_acl(struct inode *inode, int type) return acl; } -struct posix_acl *gfs2_get_acl(struct inode *inode, int type) +struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; bool need_unlock = false; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { int ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index eccc6a43326c5..cd180ca7c9591 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -11,7 +11,7 @@ #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) -extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); +extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 55a79df70d24a..e945e34847880 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -173,12 +173,15 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -struct posix_acl *jffs2_get_acl(struct inode *inode, int type) +struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; char *value = NULL; int rc, xprefix; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: xprefix = JFFS2_XPREFIX_ACL_ACCESS; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 62c50da9d493b..9d9fb7cf093ef 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -27,7 +27,7 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -struct posix_acl *jffs2_get_acl(struct inode *inode, int type); +struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu); int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 43c285c3d2a78..a653f34c6e263 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -14,13 +14,16 @@ #include "jfs_xattr.h" #include "jfs_acl.h" -struct posix_acl *jfs_get_acl(struct inode *inode, int type) +struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; char *ea_name; int size; char *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch(type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 7ae389a7a3666..3de40286d31f8 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -7,7 +7,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -struct posix_acl *jfs_get_acl(struct inode *inode, int type); +struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu); int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index c8a192802dda3..03a4e679fd995 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -11,7 +11,7 @@ * nfs3acl.c */ #ifdef CONFIG_NFS_V3_ACL -extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); +extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu); extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9ec560aa4a50b..93de0b58647a0 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -44,7 +44,7 @@ static void nfs3_abort_get_acl(struct posix_acl **p) cmpxchg(p, sentinel, ACL_NOT_CACHED); } -struct posix_acl *nfs3_get_acl(struct inode *inode, int type) +struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFSACL_MAXPAGES] = { }; @@ -62,6 +62,9 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) }; int status, count; + if (rcu) + return ERR_PTR(-ECHILD); + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) return ERR_PTR(-EOPNOTSUPP); diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 5c72a7e6d6c58..23a72a423955e 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -289,7 +289,7 @@ int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode, return status; } -struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu) { struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; @@ -297,6 +297,9 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) int had_lock; struct ocfs2_lock_holder oh; + if (rcu) + return ERR_PTR(-ECHILD); + osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return NULL; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index f59d8d0a61faf..95a57c888ab62 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -16,7 +16,7 @@ struct ocfs2_acl_entry { __le32 e_id; }; -struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type); +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu); int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 18852b9ed82b7..605e5a3506ec2 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -10,12 +10,15 @@ #include "orangefs-bufmap.h" #include -struct posix_acl *orangefs_get_acl(struct inode *inode, int type) +struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; int ret; char *key = NULL, *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: key = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 0e6b97682e41a..b5940ec1836a3 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -106,7 +106,7 @@ enum orangefs_vfs_op_states { extern int orangefs_init_acl(struct inode *inode, struct inode *dir); extern const struct xattr_handler *orangefs_xattr_handlers[]; -extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type); +extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu); extern int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 7d52e5ef2ac74..ea335d3e55cfb 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -446,12 +446,15 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; } -struct posix_acl *ovl_get_acl(struct inode *inode, int type) +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) { struct inode *realinode = ovl_inode_real(inode); const struct cred *old_cred; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e9b3e7880fc01..3894f33479552 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -498,7 +498,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -struct posix_acl *ovl_get_acl(struct inode *inode, int type); +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu); int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); bool ovl_is_private_xattr(struct super_block *sb, const char *name); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f3309a7edb49d..4f1ef826e0404 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -138,7 +138,7 @@ struct posix_acl *get_acl(struct inode *inode, int type) set_cached_acl(inode, type, NULL); return NULL; } - acl = inode->i_op->get_acl(inode, type); + acl = inode->i_op->get_acl(inode, type, false); if (IS_ERR(acl)) { /* diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h index fd58618da3607..d9052b8ce6dd9 100644 --- a/fs/reiserfs/acl.h +++ b/fs/reiserfs/acl.h @@ -48,7 +48,7 @@ static inline int reiserfs_acl_count(size_t size) } #ifdef CONFIG_REISERFS_FS_POSIX_ACL -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu); int reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int reiserfs_acl_chmod(struct inode *inode); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index a9547144a0999..d6fcddc46f5b7 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -190,13 +190,16 @@ static void *reiserfs_posix_acl_to_disk(const struct posix_acl *acl, size_t * si * inode->i_mutex: down * BKL held [before 2.5.x] */ -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu) { char *name, *value; struct posix_acl *acl; int size; int retval; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index d02bef24b32b2..9e8ac9fa96669 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -125,7 +125,7 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) } struct posix_acl * -xfs_get_acl(struct inode *inode, int type) +xfs_get_acl(struct inode *inode, int type, bool rcu) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -137,6 +137,9 @@ xfs_get_acl(struct inode *inode, int type) }; int error; + if (rcu) + return ERR_PTR(-ECHILD); + trace_xfs_get_acl(ip); switch (type) { diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 7bdb3a4ed798f..bb6abdcb265da 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -10,13 +10,13 @@ struct inode; struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL -extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); +extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); #else -static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) +static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu) { return NULL; } diff --git a/include/linux/fs.h b/include/linux/fs.h index ae6c6c34db944..73376dfe28d01 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2065,7 +2065,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct user_namespace *, struct inode *, int); - struct posix_acl * (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); From 332f606b32b6291a944c8cf23b91f53a6e676525 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Aug 2021 22:08:24 +0200 Subject: [PATCH 14/14] ovl: enable RCU'd ->get_acl() Overlayfs does not cache ACL's (to avoid double caching). Instead it just calls the underlying filesystem's i_op->get_acl(), which will return the cached value, if possible. In rcu path walk, however, get_cached_acl_rcu() is employed to get the value from the cache, which will fail on overlayfs resulting in dropping out of rcu walk mode. This can result in a big performance hit in certain situations. Fix by calling ->get_acl() with rcu=true in case of ACL_DONT_CACHE (which indicates pass-through) Reported-by: garyhuang Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 7 ++++--- fs/posix_acl.c | 13 ++++++++++++- include/linux/fs.h | 5 +++++ include/linux/posix_acl.h | 3 ++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ea335d3e55cfb..832b17589733a 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "overlayfs.h" @@ -452,12 +453,12 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) const struct cred *old_cred; struct posix_acl *acl; - if (rcu) - return ERR_PTR(-ECHILD); - if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; + if (rcu) + return get_cached_acl_rcu(realinode, type); + old_cred = ovl_override_creds(inode->i_sb); acl = get_acl(realinode, type); revert_creds(old_cred); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4f1ef826e0404..f5c25f580dd92 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -22,6 +22,7 @@ #include #include #include +#include static struct posix_acl **acl_by_type(struct inode *inode, int type) { @@ -56,7 +57,17 @@ EXPORT_SYMBOL(get_cached_acl); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { - return rcu_dereference(*acl_by_type(inode, type)); + struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type)); + + if (acl == ACL_DONT_CACHE) { + struct posix_acl *ret; + + ret = inode->i_op->get_acl(inode, type, LOOKUP_RCU); + if (!IS_ERR(ret)) + acl = ret; + } + + return acl; } EXPORT_SYMBOL(get_cached_acl_rcu); diff --git a/include/linux/fs.h b/include/linux/fs.h index 73376dfe28d01..c6e5bcbff0c02 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -581,6 +581,11 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +/* + * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to + * cache the ACL. This also means that ->get_acl() can be called in RCU mode + * with the LOOKUP_RCU flag. + */ #define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 307094ebb88c9..b65c877d92b8b 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -72,6 +72,8 @@ extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct user_namespace *, struct inode *, int, struct posix_acl *); +struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); + #ifdef CONFIG_FS_POSIX_ACL int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, @@ -84,7 +86,6 @@ extern int simple_set_acl(struct user_namespace *, struct inode *, extern int simple_acl_create(struct inode *, struct inode *); struct posix_acl *get_cached_acl(struct inode *inode, int type); -struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); void forget_cached_acl(struct inode *inode, int type); void forget_all_cached_acls(struct inode *inode);