From 4f911138c8da94bcff84f1d093d28e378703c43f Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:16 +0300 Subject: [PATCH 01/14] fs: add generic helper for filling statx attribute flags The immutable and append-only properties on an inode are published on the inode's i_flags and enforced by the VFS. Create a helper to fill the corresponding STATX_ATTR_ flags in the kstat structure from the inode's i_flags. Only orange was converted to use this helper. Other filesystems could use it in the future. Suggested-by: Miklos Szeredi Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/orangefs/inode.c | 7 +------ fs/stat.c | 18 ++++++++++++++++++ include/linux/fs.h | 1 + include/linux/stat.h | 4 ++++ 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 16ac617df7d7..c1bb4c4b5d67 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -882,12 +882,7 @@ int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path, if (!(request_mask & STATX_SIZE)) stat->result_mask &= ~STATX_SIZE; - stat->attributes_mask = STATX_ATTR_IMMUTABLE | - STATX_ATTR_APPEND; - if (inode->i_flags & S_IMMUTABLE) - stat->attributes |= STATX_ATTR_IMMUTABLE; - if (inode->i_flags & S_APPEND) - stat->attributes |= STATX_ATTR_APPEND; + generic_fill_statx_attr(inode, stat); } return ret; } diff --git a/fs/stat.c b/fs/stat.c index 1fa38bdec1a6..28d2020ba1f4 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -59,6 +59,24 @@ void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode, } EXPORT_SYMBOL(generic_fillattr); +/** + * generic_fill_statx_attr - Fill in the statx attributes from the inode flags + * @inode: Inode to use as the source + * @stat: Where to fill in the attribute flags + * + * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the + * inode that are published on i_flags and enforced by the VFS. + */ +void generic_fill_statx_attr(struct inode *inode, struct kstat *stat) +{ + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS; +} +EXPORT_SYMBOL(generic_fill_statx_attr); + /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216..ae6c6c34db94 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3355,6 +3355,7 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *); +void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); diff --git a/include/linux/stat.h b/include/linux/stat.h index fff27e603814..7df06931f25d 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -34,6 +34,10 @@ struct kstat { STATX_ATTR_ENCRYPTED | \ STATX_ATTR_VERITY \ )/* Attrs corresponding to FS_*_FL flags */ +#define KSTAT_ATTR_VFS_FLAGS \ + (STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND \ + ) /* Attrs corresponding to S_* flags that are enforced by the VFS */ u64 ino; dev_t dev; dev_t rdev; From a0c236b11706cc223252ad97e80871a18d9ee812 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:17 +0300 Subject: [PATCH 02/14] ovl: pass ovl_fs to ovl_check_setxattr() Instead of passing the overlay dentry. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 10 +++++----- fs/overlayfs/dir.c | 6 ++++-- fs/overlayfs/namei.c | 2 +- fs/overlayfs/overlayfs.h | 6 +++--- fs/overlayfs/util.c | 7 +++---- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 2846b943e80c..3fa68a5cc16e 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -331,8 +331,8 @@ out_err: return ERR_PTR(err); } -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, - struct dentry *lower, struct dentry *upper) +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper) { const struct ovl_fh *fh = NULL; int err; @@ -351,7 +351,7 @@ int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, /* * Do not fail when upper doesn't support xattrs. */ - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf, + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0); kfree(fh); @@ -526,13 +526,13 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * hard link. */ if (c->origin) { - err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp); + err = ovl_set_origin(ofs, c->lowerpath.dentry, temp); if (err) return err; } if (c->metacopy) { - err = ovl_check_setxattr(c->dentry, temp, OVL_XATTR_METACOPY, + err = ovl_check_setxattr(ofs, temp, OVL_XATTR_METACOPY, NULL, 0, -EOPNOTSUPP); if (err) return err; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 93efe7048a77..258434567a34 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -233,9 +233,10 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr) static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper, int xerr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; - err = ovl_check_setxattr(dentry, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr); if (!err) ovl_dentry_set_opaque(dentry); @@ -1043,6 +1044,7 @@ static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir) static int ovl_set_redirect(struct dentry *dentry, bool samedir) { int err; + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); const char *redirect = ovl_dentry_get_redirect(dentry); bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir); @@ -1053,7 +1055,7 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir) if (IS_ERR(redirect)) return PTR_ERR(redirect); - err = ovl_check_setxattr(dentry, ovl_dentry_upper(dentry), + err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry), OVL_XATTR_REDIRECT, redirect, strlen(redirect), -EXDEV); if (!err) { diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 210cd6f66e28..da063b18b419 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -811,7 +811,7 @@ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, if (err) return err; - err = ovl_set_origin(ofs, dentry, lower, upper); + err = ovl_set_origin(ofs, lower, upper); if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6ec73db4bf9e..e5dabf7ef339 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -320,7 +320,7 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags); bool ovl_check_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry); bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry, enum ovl_xattr ox); -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr); int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry); @@ -561,8 +561,8 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, int ovl_set_attr(struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry, - struct dentry *lower, struct dentry *upper); +int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower, + struct dentry *upper); /* export.c */ extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index b9d03627f364..81b8f135445a 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -600,12 +600,11 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), }; -int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, +int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, enum ovl_xattr ox, const void *value, size_t size, int xerr) { int err; - struct ovl_fs *ofs = dentry->d_sb->s_fs_info; if (ofs->noxattr) return xerr; @@ -623,6 +622,7 @@ int ovl_check_setxattr(struct dentry *dentry, struct dentry *upperdentry, int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); int err; if (ovl_test_flag(OVL_IMPURE, d_inode(dentry))) @@ -632,8 +632,7 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) * Do not fail when upper doesn't support xattrs. * Upper inodes won't have origin nor redirect xattr anyway. */ - err = ovl_check_setxattr(dentry, upperdentry, OVL_XATTR_IMPURE, - "y", 1, 0); + err = ovl_check_setxattr(ofs, upperdentry, OVL_XATTR_IMPURE, "y", 1, 0); if (!err) ovl_set_flag(OVL_IMPURE, d_inode(dentry)); From 72db82115d2bdfbfba8b15a92d91872cfe1b40c6 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:18 +0300 Subject: [PATCH 03/14] ovl: copy up sync/noatime fileattr flags When a lower file has sync/noatime fileattr flags, the behavior of overlayfs post copy up is inconsistent. Immediately after copy up, ovl inode still has the S_SYNC/S_NOATIME inode flags copied from lower inode, so vfs code still treats the ovl inode as sync/noatime. After ovl inode evict or mount cycle, the ovl inode does not have these inode flags anymore. To fix this inconsistency, try to copy the fileattr flags on copy up if the upper fs supports the fileattr_set() method. This gives consistent behavior post copy up regardless of inode eviction from cache. We cannot copy up the immutable/append-only inode flags in a similar manner, because immutable/append-only inodes cannot be linked and because overlayfs will not be able to set overlay.* xattr on the upper inodes. Those flags will be addressed by a followup patch. Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 51 ++++++++++++++++++++++++++++++++++------ fs/overlayfs/inode.c | 44 ++++++++++++++++++++++++---------- fs/overlayfs/overlayfs.h | 15 +++++++++++- 3 files changed, 89 insertions(+), 21 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 3fa68a5cc16e..daf2afa603d3 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -130,6 +131,31 @@ out: return error; } +static int ovl_copy_fileattr(struct path *old, struct path *new) +{ + struct fileattr oldfa = { .flags_valid = true }; + struct fileattr newfa = { .flags_valid = true }; + int err; + + err = ovl_real_fileattr_get(old, &oldfa); + if (err) + return err; + + err = ovl_real_fileattr_get(new, &newfa); + if (err) + return err; + + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); + newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; + newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); + + BUILD_BUG_ON(OVL_COPY_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + newfa.fsx_xflags &= ~OVL_COPY_FSX_FLAGS_MASK; + newfa.fsx_xflags |= (oldfa.fsx_xflags & OVL_COPY_FSX_FLAGS_MASK); + + return ovl_real_fileattr_set(new, &newfa); +} + static int ovl_copy_up_data(struct ovl_fs *ofs, struct path *old, struct path *new, loff_t len) { @@ -493,20 +519,21 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) { struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb); + struct inode *inode = d_inode(c->dentry); + struct path upperpath, datapath; int err; + ovl_path_upper(c->dentry, &upperpath); + if (WARN_ON(upperpath.dentry != NULL)) + return -EIO; + + upperpath.dentry = temp; + /* * Copy up data first and then xattrs. Writing data after * xattrs will remove security.capability xattr automatically. */ if (S_ISREG(c->stat.mode) && !c->metacopy) { - struct path upperpath, datapath; - - ovl_path_upper(c->dentry, &upperpath); - if (WARN_ON(upperpath.dentry != NULL)) - return -EIO; - upperpath.dentry = temp; - ovl_path_lowerdata(c->dentry, &datapath); err = ovl_copy_up_data(ofs, &datapath, &upperpath, c->stat.size); @@ -518,6 +545,16 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) if (err) return err; + if (inode->i_flags & OVL_COPY_I_FLAGS_MASK) { + /* + * Copy the fileattr inode flags that are the source of already + * copied i_flags + */ + err = ovl_copy_fileattr(&c->lowerpath, &upperpath); + if (err) + return err; + } + /* * Store identifier of lower inode in upper inode xattr to * allow lookup of the copy up origin inode. diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 5e828a1c98a8..b288843e6b42 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -503,16 +503,14 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Introducing security_inode_fileattr_get/set() hooks would solve this issue * properly. */ -static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa, +static int ovl_security_fileattr(struct path *realpath, struct fileattr *fa, bool set) { - struct path realpath; struct file *file; unsigned int cmd; int err; - ovl_path_real(dentry, &realpath); - file = dentry_open(&realpath, O_RDONLY, current_cred()); + file = dentry_open(realpath, O_RDONLY, current_cred()); if (IS_ERR(file)) return PTR_ERR(file); @@ -527,11 +525,22 @@ static int ovl_security_fileattr(struct dentry *dentry, struct fileattr *fa, return err; } +int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, true); + if (err) + return err; + + return vfs_fileattr_set(&init_user_ns, realpath->dentry, fa); +} + int ovl_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct dentry *upperdentry; + struct path upperpath; const struct cred *old_cred; int err; @@ -541,12 +550,10 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, err = ovl_copy_up(dentry); if (!err) { - upperdentry = ovl_dentry_upper(dentry); + ovl_path_real(dentry, &upperpath); old_cred = ovl_override_creds(inode->i_sb); - err = ovl_security_fileattr(dentry, fa, true); - if (!err) - err = vfs_fileattr_set(&init_user_ns, upperdentry, fa); + err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); ovl_copyflags(ovl_inode_real(inode), inode); } @@ -555,17 +562,28 @@ out: return err; } +int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) +{ + int err; + + err = ovl_security_fileattr(realpath, fa, false); + if (err) + return err; + + return vfs_fileattr_get(realpath->dentry, fa); +} + int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); - struct dentry *realdentry = ovl_dentry_real(dentry); + struct path realpath; const struct cred *old_cred; int err; + ovl_path_real(dentry, &realpath); + old_cred = ovl_override_creds(inode->i_sb); - err = ovl_security_fileattr(dentry, fa, false); - if (!err) - err = vfs_fileattr_get(realdentry, fa); + err = ovl_real_fileattr_get(&realpath, fa); revert_creds(old_cred); return err; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e5dabf7ef339..2cbebe06d9ad 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -518,9 +518,20 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) i_size_write(to, i_size_read(from)); } +/* vfs inode flags copied from real to ovl inode */ +#define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) + +/* + * fileattr flags copied from lower to upper inode on copy up. + * We cannot copy immutable/append-only flags, because that would prevevnt + * linking temp inode to upper dir. + */ +#define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) +#define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) + static inline void ovl_copyflags(struct inode *from, struct inode *to) { - unsigned int mask = S_SYNC | S_IMMUTABLE | S_APPEND | S_NOATIME; + unsigned int mask = OVL_COPY_I_FLAGS_MASK; inode_set_flags(to, from->i_flags & mask, mask); } @@ -548,6 +559,8 @@ struct dentry *ovl_create_temp(struct dentry *workdir, struct ovl_cattr *attr); extern const struct file_operations ovl_file_operations; int __init ovl_aio_request_cache_init(void); void ovl_aio_request_cache_destroy(void); +int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa); +int ovl_real_fileattr_set(struct path *realpath, struct fileattr *fa); int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa); int ovl_fileattr_set(struct user_namespace *mnt_userns, struct dentry *dentry, struct fileattr *fa); From 096a218a588d78d699adcacb6919cff4718c4cac Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sat, 19 Jun 2021 12:26:19 +0300 Subject: [PATCH 04/14] ovl: consistent behavior for immutable/append-only inodes When a lower file has immutable/append-only fileattr flags, the behavior of overlayfs post copy up is inconsistent. Immediattely after copy up, ovl inode still has the S_IMMUTABLE/S_APPEND inode flags copied from lower inode, so vfs code still treats the ovl inode as immutable/append-only. After ovl inode evict or mount cycle, the ovl inode does not have these inode flags anymore. We cannot copy up the immutable and append-only fileattr flags, because immutable/append-only inodes cannot be linked and because overlayfs will not be able to set overlay.* xattr on the upper inodes. Instead, if any of the fileattr flags of interest exist on the lower inode, we store them in overlay.protattr xattr on the upper inode and we read the flags from xattr on lookup and on fileattr_get(). This gives consistent behavior post copy up regardless of inode eviction from cache. When user sets new fileattr flags, we update or remove the overlay.protattr xattr. Storing immutable/append-only fileattr flags in an xattr instead of upper fileattr also solves other non-standard behavior issues - overlayfs can now copy up children of "ovl-immutable" directories and lower aliases of "ovl-immutable" hardlinks. Reported-by: Chengguang Xu Link: https://lore.kernel.org/linux-unionfs/20201226104618.239739-1-cgxu519@mykernel.net/ Link: https://lore.kernel.org/linux-unionfs/20210210190334.1212210-5-amir73il@gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 17 +++++++- fs/overlayfs/inode.c | 50 +++++++++++++++++++++-- fs/overlayfs/overlayfs.h | 13 +++++- fs/overlayfs/util.c | 85 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 7 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index daf2afa603d3..9d8ebf0e7237 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -131,7 +131,8 @@ out: return error; } -static int ovl_copy_fileattr(struct path *old, struct path *new) +static int ovl_copy_fileattr(struct inode *inode, struct path *old, + struct path *new) { struct fileattr oldfa = { .flags_valid = true }; struct fileattr newfa = { .flags_valid = true }; @@ -145,6 +146,18 @@ static int ovl_copy_fileattr(struct path *old, struct path *new) if (err) return err; + /* + * We cannot set immutable and append-only flags on upper inode, + * because we would not be able to link upper inode to upper dir + * not set overlay private xattr on upper inode. + * Store these flags in overlay.protattr xattr instead. + */ + if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { + err = ovl_set_protattr(inode, new->dentry, &oldfa); + if (err) + return err; + } + BUILD_BUG_ON(OVL_COPY_FS_FLAGS_MASK & ~FS_COMMON_FL); newfa.flags &= ~OVL_COPY_FS_FLAGS_MASK; newfa.flags |= (oldfa.flags & OVL_COPY_FS_FLAGS_MASK); @@ -550,7 +563,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp) * Copy the fileattr inode flags that are the source of already * copied i_flags */ - err = ovl_copy_fileattr(&c->lowerpath, &upperpath); + err = ovl_copy_fileattr(inode, &c->lowerpath, &upperpath); if (err) return err; } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index b288843e6b42..37300e972a39 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -162,7 +162,8 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, enum ovl_path_type type; struct path realpath; const struct cred *old_cred; - bool is_dir = S_ISDIR(dentry->d_inode->i_mode); + struct inode *inode = d_inode(dentry); + bool is_dir = S_ISDIR(inode->i_mode); int fsid = 0; int err; bool metacopy_blocks = false; @@ -175,6 +176,9 @@ int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path, if (err) goto out; + /* Report the effective immutable/append-only STATX flags */ + generic_fill_statx_attr(inode, stat); + /* * For non-dir or same fs, we use st_ino of the copy up origin. * This guaranties constant st_dev/st_ino across copy up. @@ -542,6 +546,7 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, struct inode *inode = d_inode(dentry); struct path upperpath; const struct cred *old_cred; + unsigned int flags; int err; err = ovl_want_write(dentry); @@ -553,15 +558,49 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, ovl_path_real(dentry, &upperpath); old_cred = ovl_override_creds(inode->i_sb); - err = ovl_real_fileattr_set(&upperpath, fa); + /* + * Store immutable/append-only flags in xattr and clear them + * in upper fileattr (in case they were set by older kernel) + * so children of "ovl-immutable" directories lower aliases of + * "ovl-immutable" hardlinks could be copied up. + * Clear xattr when flags are cleared. + */ + err = ovl_set_protattr(inode, upperpath.dentry, fa); + if (!err) + err = ovl_real_fileattr_set(&upperpath, fa); revert_creds(old_cred); - ovl_copyflags(ovl_inode_real(inode), inode); + + /* + * Merge real inode flags with inode flags read from + * overlay.protattr xattr + */ + flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; + + BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); + flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; + inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); } ovl_drop_write(dentry); out: return err; } +/* Convert inode protection flags to fileattr flags */ +static void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) +{ + BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); + BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); + + if (inode->i_flags & S_APPEND) { + fa->flags |= FS_APPEND_FL; + fa->fsx_xflags |= FS_XFLAG_APPEND; + } + if (inode->i_flags & S_IMMUTABLE) { + fa->flags |= FS_IMMUTABLE_FL; + fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; + } +} + int ovl_real_fileattr_get(struct path *realpath, struct fileattr *fa) { int err; @@ -584,6 +623,7 @@ int ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) old_cred = ovl_override_creds(inode->i_sb); err = ovl_real_fileattr_get(&realpath, fa); + ovl_fileattr_prot_flags(inode, fa); revert_creds(old_cred); return err; @@ -1136,6 +1176,10 @@ struct inode *ovl_get_inode(struct super_block *sb, } } + /* Check for immutable/append-only inode flags in xattr */ + if (upperdentry) + ovl_check_protattr(inode, upperdentry); + if (inode->i_state & I_NEW) unlock_new_inode(inode); out: diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2cbebe06d9ad..2433cc030c87 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -34,6 +34,7 @@ enum ovl_xattr { OVL_XATTR_NLINK, OVL_XATTR_UPPER, OVL_XATTR_METACOPY, + OVL_XATTR_PROTATTR, }; enum ovl_inode_flag { @@ -520,14 +521,22 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to) /* vfs inode flags copied from real to ovl inode */ #define OVL_COPY_I_FLAGS_MASK (S_SYNC | S_NOATIME | S_APPEND | S_IMMUTABLE) +/* vfs inode flags read from overlay.protattr xattr to ovl inode */ +#define OVL_PROT_I_FLAGS_MASK (S_APPEND | S_IMMUTABLE) /* * fileattr flags copied from lower to upper inode on copy up. - * We cannot copy immutable/append-only flags, because that would prevevnt - * linking temp inode to upper dir. + * We cannot copy up immutable/append-only flags, because that would prevent + * linking temp inode to upper dir, so we store them in xattr instead. */ #define OVL_COPY_FS_FLAGS_MASK (FS_SYNC_FL | FS_NOATIME_FL) #define OVL_COPY_FSX_FLAGS_MASK (FS_XFLAG_SYNC | FS_XFLAG_NOATIME) +#define OVL_PROT_FS_FLAGS_MASK (FS_APPEND_FL | FS_IMMUTABLE_FL) +#define OVL_PROT_FSX_FLAGS_MASK (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE) + +void ovl_check_protattr(struct inode *inode, struct dentry *upper); +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa); static inline void ovl_copyflags(struct inode *from, struct inode *to) { diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 81b8f135445a..f48284a2a896 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -585,6 +586,7 @@ bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry, #define OVL_XATTR_NLINK_POSTFIX "nlink" #define OVL_XATTR_UPPER_POSTFIX "upper" #define OVL_XATTR_METACOPY_POSTFIX "metacopy" +#define OVL_XATTR_PROTATTR_POSTFIX "protattr" #define OVL_XATTR_TAB_ENTRY(x) \ [x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \ @@ -598,6 +600,7 @@ const char *const ovl_xattr_table[][2] = { OVL_XATTR_TAB_ENTRY(OVL_XATTR_NLINK), OVL_XATTR_TAB_ENTRY(OVL_XATTR_UPPER), OVL_XATTR_TAB_ENTRY(OVL_XATTR_METACOPY), + OVL_XATTR_TAB_ENTRY(OVL_XATTR_PROTATTR), }; int ovl_check_setxattr(struct ovl_fs *ofs, struct dentry *upperdentry, @@ -639,6 +642,88 @@ int ovl_set_impure(struct dentry *dentry, struct dentry *upperdentry) return err; } + +#define OVL_PROTATTR_MAX 32 /* Reserved for future flags */ + +void ovl_check_protattr(struct inode *inode, struct dentry *upper) +{ + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + u32 iflags = inode->i_flags & OVL_PROT_I_FLAGS_MASK; + char buf[OVL_PROTATTR_MAX+1]; + int res, n; + + res = ovl_do_getxattr(ofs, upper, OVL_XATTR_PROTATTR, buf, + OVL_PROTATTR_MAX); + if (res < 0) + return; + + /* + * Initialize inode flags from overlay.protattr xattr and upper inode + * flags. If upper inode has those fileattr flags set (i.e. from old + * kernel), we do not clear them on ovl_get_inode(), but we will clear + * them on next fileattr_set(). + */ + for (n = 0; n < res; n++) { + if (buf[n] == 'a') + iflags |= S_APPEND; + else if (buf[n] == 'i') + iflags |= S_IMMUTABLE; + else + break; + } + + if (!res || n < res) { + pr_warn_ratelimited("incompatible overlay.protattr format (%pd2, len=%d)\n", + upper, res); + } else { + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + } +} + +int ovl_set_protattr(struct inode *inode, struct dentry *upper, + struct fileattr *fa) +{ + struct ovl_fs *ofs = OVL_FS(inode->i_sb); + char buf[OVL_PROTATTR_MAX]; + int len = 0, err = 0; + u32 iflags = 0; + + BUILD_BUG_ON(HWEIGHT32(OVL_PROT_FS_FLAGS_MASK) > OVL_PROTATTR_MAX); + + if (fa->flags & FS_APPEND_FL) { + buf[len++] = 'a'; + iflags |= S_APPEND; + } + if (fa->flags & FS_IMMUTABLE_FL) { + buf[len++] = 'i'; + iflags |= S_IMMUTABLE; + } + + /* + * Do not allow to set protection flags when upper doesn't support + * xattrs, because we do not set those fileattr flags on upper inode. + * Remove xattr if it exist and all protection flags are cleared. + */ + if (len) { + err = ovl_check_setxattr(ofs, upper, OVL_XATTR_PROTATTR, + buf, len, -EPERM); + } else if (inode->i_flags & OVL_PROT_I_FLAGS_MASK) { + err = ovl_do_removexattr(ofs, upper, OVL_XATTR_PROTATTR); + if (err == -EOPNOTSUPP || err == -ENODATA) + err = 0; + } + if (err) + return err; + + inode_set_flags(inode, iflags, OVL_PROT_I_FLAGS_MASK); + + /* Mask out the fileattr flags that should not be set in upper inode */ + fa->flags &= ~OVL_PROT_FS_FLAGS_MASK; + fa->fsx_xflags &= ~OVL_PROT_FSX_FLAGS_MASK; + + return 0; +} + /** * Caller must hold a reference to inode to prevent it from being freed while * it is marked inuse. From e4522bc8733dce1cb4443f1d506869781ee9caa8 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:45 +0200 Subject: [PATCH 05/14] ovl: disable decoding null uuid with redirect_dir Currently decoding origin with lower null uuid is not allowed unless user opted-in to one of the new features that require following the lower inode of non-dir upper (index, xino, metacopy). Now we add redirect_dir too to that feature list. Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b01d4147520d..97ea35fdd933 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1600,7 +1600,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * lower inode of non-dir upper. */ if (!ofs->config.index && !ofs->config.metacopy && - ofs->config.xino != OVL_XINO_ON && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON && uuid_is_null(uuid)) return false; From ca45275cd6b63e917eef681f9f15978b96afdbbe Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:46 +0200 Subject: [PATCH 06/14] ovl: add ovl_allow_offline_changes() helper Allows to check whether any of extended features are enabled Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 12 ++++++++++++ fs/overlayfs/super.c | 4 +--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 2433cc030c87..e9b3e7880fc0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -263,6 +263,18 @@ static inline bool ovl_open_flags_need_copy_up(int flags) return ((OPEN_FMODE(flags) & FMODE_WRITE) || (flags & O_TRUNC)); } +static inline bool ovl_allow_offline_changes(struct ovl_fs *ofs) +{ + /* + * To avoid regressions in existing setups with overlay lower offline + * changes, we allow lower changes only if none of the new features + * are used. + */ + return (!ofs->config.index && !ofs->config.metacopy && + !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON); +} + + /* util.c */ int ovl_want_write(struct dentry *dentry); void ovl_drop_write(struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 97ea35fdd933..178daa5e82c9 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1599,9 +1599,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid) * user opted-in to one of the new features that require following the * lower inode of non-dir upper. */ - if (!ofs->config.index && !ofs->config.metacopy && - !ofs->config.redirect_dir && ofs->config.xino != OVL_XINO_ON && - uuid_is_null(uuid)) + if (ovl_allow_offline_changes(ofs) && uuid_is_null(uuid)) return false; for (i = 0; i < ofs->numfs; i++) { From 1fc31aac96d7060ecee18124be6de18cb2268922 Mon Sep 17 00:00:00 2001 From: Vyacheslav Yurkov Date: Thu, 27 May 2021 19:45:47 +0200 Subject: [PATCH 07/14] ovl: do not set overlay.opaque for new directories Enable optimizations only if user opted-in for any of extended features. If optimization is enabled, it breaks existing use case when a lower layer directory appears after directory was created on a merged layer. If overlay.opaque is applied, new files on lower layer are not visible. Consider the following scenario: - /lower and /upper are mounted to /merged - directory /merged/new-dir is created with a file test1 - overlay is unmounted - directory /lower/new-dir is created with a file test2 - overlay is mounted again If opaque is applied by default, file test2 is not going to be visible without explicitly clearing the overlay.opaque attribute Signed-off-by: Vyacheslav Yurkov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 258434567a34..9154222883e6 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -321,6 +321,7 @@ static bool ovl_type_origin(struct dentry *dentry) static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct ovl_cattr *attr) { + struct ovl_fs *ofs = OVL_FS(dentry->d_sb); struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); struct inode *udir = upperdir->d_inode; struct dentry *newdentry; @@ -339,7 +340,8 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, if (IS_ERR(newdentry)) goto out_unlock; - if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry)) { + if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) && + !ovl_allow_offline_changes(ofs)) { /* Setting opaque here is just an optimization, allow to fail */ ovl_set_opaque(dentry, newdentry); } From ffb24e3c657869b256c3f90792d262fe09f49628 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 27 Apr 2021 13:28:26 +0300 Subject: [PATCH 08/14] ovl: relax lookup error on mismatch origin ftype We get occasional reports of lookup errors due to mismatched origin ftype from users that re-format a lower squashfs image. Commit 13c6ad0f45fd ("ovl: document lower modification caveats") tries to discourage the practice of re-formating lower layers and describes the expected behavior as undefined. Commit b0e0f69731cd ("ovl: restrict lower null uuid for "xino=auto"") limits the configurations in which origin file handles are followed. In addition to these measures, change the behavior in case of detecting a mismatch origin ftype in lookup to issue a warning, not follow origin, but not fail the lookup operation either. That should make overall more users happy without any big consequences. Link: https://lore.kernel.org/linux-unionfs/CAOQ4uxgPq9E9xxwU2CDyHy-_yCZZeymg+3n+-6AqkGGE1YtwvQ@mail.gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index da063b18b419..1a9b515fc45d 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -392,7 +392,7 @@ invalid: upperdentry, d_inode(upperdentry)->i_mode & S_IFMT, d_inode(origin)->i_mode & S_IFMT); dput(origin); - return -EIO; + return -ESTALE; } static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry, From b71759ef1e1730db81dab98e9dab9455e8c7f5a2 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Sat, 24 Apr 2021 22:03:15 +0800 Subject: [PATCH 09/14] ovl: skip checking lower file's i_writecount on truncate It is possible that a directory tree is shared between multiple overlay instances as a lower layer. In this case when one instance executes a file residing on the lower layer, the other instance denies a truncate(2) call on this file. This only happens for truncate(2) and not for open(2) with the O_TRUNC flag. Fix this interference and inconsistency by removing the preliminary i_writecount check before copy-up. This means that unlike on normal filesystems truncate(argv[0]) will now succeed. If this ever causes a regression in a real world use case this needs to be revisited. One way to fix this properly would be to keep a correct i_writecount in the overlay inode, but that is difficult due to memory mapping code only dealing with the real file/inode. Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- Documentation/filesystems/overlayfs.rst | 3 +++ fs/overlayfs/inode.c | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/overlayfs.rst b/Documentation/filesystems/overlayfs.rst index 455ca86eb4fc..7da6c30ed596 100644 --- a/Documentation/filesystems/overlayfs.rst +++ b/Documentation/filesystems/overlayfs.rst @@ -427,6 +427,9 @@ b) If a file residing on a lower layer is opened for read-only and then memory mapped with MAP_SHARED, then subsequent changes to the file are not reflected in the memory mapping. +c) If a file residing on a lower layer is being executed, then opening that +file for write or truncating the file will not be denied with ETXTBSY. + The following options allow overlayfs to act more like a standards compliant filesystem: diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 37300e972a39..8aa370e8143a 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -33,12 +33,6 @@ int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto out; if (attr->ia_valid & ATTR_SIZE) { - struct inode *realinode = d_inode(ovl_dentry_real(dentry)); - - err = -ETXTBSY; - if (atomic_read(&realinode->i_writecount) < 0) - goto out_drop_write; - /* Truncate should trigger data copy up as well */ full_copy_up = true; } From d8991e8622e758b718e2e4291d31dd0bea4e14a4 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Wed, 10 Mar 2021 10:09:25 +0800 Subject: [PATCH 10/14] ovl: update ctime when changing fileattr Currently we keep size, mode and times of overlay inode as the same as upper inode, so should update ctime when changing file attribution as well. Signed-off-by: Chengguang Xu Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 8aa370e8143a..7d52e5ef2ac7 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -573,6 +573,9 @@ int ovl_fileattr_set(struct user_namespace *mnt_userns, BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); + + /* Update ctime */ + ovl_copyattr(ovl_inode_real(inode), inode); } ovl_drop_write(dentry); out: From f945ca1963c8bd29471020d7c58c594ee7007006 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 22 Jul 2021 14:18:14 +0200 Subject: [PATCH 11/14] ovl: use kvalloc in xattr copy-up Extended attributes are usually small, but could be up to 64k in size, so use the most efficient method for doing the allocation. Signed-off-by: Miklos Szeredi --- fs/overlayfs/copy_up.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 9d8ebf0e7237..4e7d5bfa2949 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -63,7 +63,7 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old, return list_size; } - buf = kzalloc(list_size, GFP_KERNEL); + buf = kvzalloc(list_size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -106,11 +106,12 @@ retry: if (size > value_size) { void *new; - new = krealloc(value, size, GFP_KERNEL); + new = kvmalloc(size, GFP_KERNEL); if (!new) { error = -ENOMEM; break; } + kvfree(value); value = new; value_size = size; goto retry; @@ -125,9 +126,9 @@ retry: error = 0; } } - kfree(value); + kvfree(value); out: - kfree(buf); + kvfree(buf); return error; } From 52d5a0c6bd8a89f460243ed937856354f8f253a3 Mon Sep 17 00:00:00 2001 From: chenying Date: Mon, 16 Aug 2021 18:02:56 +0800 Subject: [PATCH 12/14] ovl: fix BUG_ON() in may_delete() when called from ovl_cleanup() If function ovl_instantiate() returns an error, ovl_cleanup will be called and try to remove newdentry from wdir, but the newdentry has been moved to udir at this time. This will causes BUG_ON(victim->d_parent->d_inode != dir) in fs/namei.c:may_delete. Signed-off-by: chenying Fixes: 01b39dcc9568 ("ovl: use inode_insert5() to hash a newly created inode") Link: https://lore.kernel.org/linux-unionfs/e6496a94-a161-dc04-c38a-d2544633acb4@bytedance.com/ Cc: # v4.18 Signed-off-by: Miklos Szeredi --- fs/overlayfs/dir.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 9154222883e6..1fefb2b8960e 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -545,8 +545,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, goto out_cleanup; } err = ovl_instantiate(dentry, inode, newdentry, hardlink); - if (err) - goto out_cleanup; + if (err) { + ovl_cleanup(udir, newdentry); + dput(newdentry); + } out_dput: dput(upper); out_unlock: From 0cad6246621b5887d5b33fea84219d2a71f2f99a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Aug 2021 22:08:24 +0200 Subject: [PATCH 13/14] vfs: add rcu argument to ->get_acl() callback Add a rcu argument to the ->get_acl() callback to allow get_cached_acl_rcu() to call the ->get_acl() method in the next patch. Signed-off-by: Miklos Szeredi --- Documentation/filesystems/locking.rst | 2 +- Documentation/filesystems/vfs.rst | 2 +- fs/9p/acl.c | 5 ++++- fs/9p/acl.h | 2 +- fs/bad_inode.c | 2 +- fs/btrfs/acl.c | 5 ++++- fs/btrfs/ctree.h | 2 +- fs/ceph/acl.c | 5 ++++- fs/ceph/super.h | 2 +- fs/erofs/xattr.c | 5 ++++- fs/erofs/xattr.h | 2 +- fs/ext2/acl.c | 5 ++++- fs/ext2/acl.h | 2 +- fs/ext4/acl.c | 5 ++++- fs/ext4/acl.h | 2 +- fs/f2fs/acl.c | 5 ++++- fs/f2fs/acl.h | 2 +- fs/fuse/acl.c | 5 ++++- fs/fuse/fuse_i.h | 2 +- fs/gfs2/acl.c | 5 ++++- fs/gfs2/acl.h | 2 +- fs/jffs2/acl.c | 5 ++++- fs/jffs2/acl.h | 2 +- fs/jfs/acl.c | 5 ++++- fs/jfs/jfs_acl.h | 2 +- fs/nfs/nfs3_fs.h | 2 +- fs/nfs/nfs3acl.c | 5 ++++- fs/ocfs2/acl.c | 5 ++++- fs/ocfs2/acl.h | 2 +- fs/orangefs/acl.c | 5 ++++- fs/orangefs/orangefs-kernel.h | 2 +- fs/overlayfs/inode.c | 5 ++++- fs/overlayfs/overlayfs.h | 2 +- fs/posix_acl.c | 2 +- fs/reiserfs/acl.h | 2 +- fs/reiserfs/xattr_acl.c | 5 ++++- fs/xfs/xfs_acl.c | 5 ++++- fs/xfs/xfs_acl.h | 4 ++-- include/linux/fs.h | 2 +- 39 files changed, 91 insertions(+), 40 deletions(-) diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 2183fd8cc350..899fa9aba01a 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -70,7 +70,7 @@ prototypes:: const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, unsigned int); - int (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl)(struct inode *, int, bool); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 14c31eced416..bf5c48066fac 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -432,7 +432,7 @@ As of kernel 2.6.22, the following members are defined: const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct user_namespace *, struct inode *, int); - int (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl)(struct inode *, int, bool); int (*setattr) (struct user_namespace *, struct dentry *, struct iattr *); int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); diff --git a/fs/9p/acl.c b/fs/9p/acl.c index bb1b286c49ae..c381499f5416 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -97,10 +97,13 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) return acl; } -struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type) +struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu) { struct v9fs_session_info *v9ses; + if (rcu) + return ERR_PTR(-ECHILD); + v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { diff --git a/fs/9p/acl.h b/fs/9p/acl.h index e4f7e882272b..d43c8949e807 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -16,7 +16,7 @@ #ifdef CONFIG_9P_FS_POSIX_ACL extern int v9fs_get_acl(struct inode *, struct p9_fid *); -extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); +extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type, bool rcu); extern int v9fs_acl_chmod(struct inode *, struct p9_fid *); extern int v9fs_set_create_acl(struct inode *, struct p9_fid *, struct posix_acl *, struct posix_acl *); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 48e16144c1f7..12b8fdcc445b 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -121,7 +121,7 @@ static const char *bad_inode_get_link(struct dentry *dentry, return ERR_PTR(-EIO); } -static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type) +static struct posix_acl *bad_inode_get_acl(struct inode *inode, int type, bool rcu) { return ERR_PTR(-EIO); } diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index d95eb5c8cb37..3d00bb5deded 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -16,13 +16,16 @@ #include "btrfs_inode.h" #include "xattr.h" -struct posix_acl *btrfs_get_acl(struct inode *inode, int type) +struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu) { int size; const char *name; char *value = NULL; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e5e53e592d4f..ca5c7cb1b729 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3686,7 +3686,7 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -struct posix_acl *btrfs_get_acl(struct inode *inode, int type); +struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int btrfs_init_acl(struct btrfs_trans_handle *trans, diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 529af59d9fd3..f4fc8e0b847c 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -29,7 +29,7 @@ static inline void ceph_set_cached_acl(struct inode *inode, spin_unlock(&ci->i_ceph_lock); } -struct posix_acl *ceph_get_acl(struct inode *inode, int type) +struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu) { int size; unsigned int retry_cnt = 0; @@ -37,6 +37,9 @@ struct posix_acl *ceph_get_acl(struct inode *inode, int type) char *value = NULL; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 9215a2f4535c..b9512684e150 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1087,7 +1087,7 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx); /* acl.c */ #ifdef CONFIG_CEPH_FS_POSIX_ACL -struct posix_acl *ceph_get_acl(struct inode *, int); +struct posix_acl *ceph_get_acl(struct inode *, int, bool); int ceph_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int ceph_pre_init_acls(struct inode *dir, umode_t *mode, diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 8dd54b420a1d..778f2c52295d 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -673,12 +673,15 @@ ssize_t erofs_listxattr(struct dentry *dentry, } #ifdef CONFIG_EROFS_FS_POSIX_ACL -struct posix_acl *erofs_get_acl(struct inode *inode, int type) +struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; int prefix, rc; char *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index 366dcb400525..94090c74b3f7 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -80,7 +80,7 @@ static inline int erofs_getxattr(struct inode *inode, int index, #endif /* !CONFIG_EROFS_FS_XATTR */ #ifdef CONFIG_EROFS_FS_POSIX_ACL -struct posix_acl *erofs_get_acl(struct inode *inode, int type); +struct posix_acl *erofs_get_acl(struct inode *inode, int type, bool rcu); #else #define erofs_get_acl (NULL) #endif diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index b9a9db98e94b..bf298967c5b8 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -141,13 +141,16 @@ fail: * inode->i_mutex: don't care */ struct posix_acl * -ext2_get_acl(struct inode *inode, int type) +ext2_get_acl(struct inode *inode, int type, bool rcu) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 917db5f6630a..925ab6287d35 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -55,7 +55,7 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL /* acl.c */ -extern struct posix_acl *ext2_get_acl(struct inode *inode, int type); +extern struct posix_acl *ext2_get_acl(struct inode *inode, int type, bool rcu); extern int ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index c5eaffccecc3..0613dfcbfd4a 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -142,13 +142,16 @@ fail: * inode->i_mutex: don't care */ struct posix_acl * -ext4_get_acl(struct inode *inode, int type) +ext4_get_acl(struct inode *inode, int type, bool rcu) { int name_index; char *value = NULL; struct posix_acl *acl; int retval; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 84b8942a57f2..3219669732bf 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -55,7 +55,7 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL /* acl.c */ -struct posix_acl *ext4_get_acl(struct inode *inode, int type); +struct posix_acl *ext4_get_acl(struct inode *inode, int type, bool rcu); int ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 239ad9453b99..16e826e01f09 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -196,8 +196,11 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, return acl; } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +struct posix_acl *f2fs_get_acl(struct inode *inode, int type, bool rcu) { + if (rcu) + return ERR_PTR(-ECHILD); + return __f2fs_get_acl(inode, type, NULL); } diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 986fd1bc780b..a26e33cab4ff 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -33,7 +33,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *, int); +extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool); extern int f2fs_set_acl(struct user_namespace *, struct inode *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 52b165319be1..337cb29a8dd5 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -11,7 +11,7 @@ #include #include -struct posix_acl *fuse_get_acl(struct inode *inode, int type) +struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu) { struct fuse_conn *fc = get_fuse_conn(inode); int size; @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + if (fuse_is_bad(inode)) return ERR_PTR(-EIO); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 07829ce78695..f4140943311a 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1216,7 +1216,7 @@ extern const struct xattr_handler *fuse_acl_xattr_handlers[]; extern const struct xattr_handler *fuse_no_acl_xattr_handlers[]; struct posix_acl; -struct posix_acl *fuse_get_acl(struct inode *inode, int type); +struct posix_acl *fuse_get_acl(struct inode *inode, int type, bool rcu); int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 9165d70ead07..734d1f05d823 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -57,13 +57,16 @@ static struct posix_acl *__gfs2_get_acl(struct inode *inode, int type) return acl; } -struct posix_acl *gfs2_get_acl(struct inode *inode, int type) +struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; bool need_unlock = false; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { int ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index eccc6a43326c..cd180ca7c959 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -11,7 +11,7 @@ #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) -extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); +extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type, bool rcu); extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 55a79df70d24..e945e3484788 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -173,12 +173,15 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -struct posix_acl *jffs2_get_acl(struct inode *inode, int type) +struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; char *value = NULL; int rc, xprefix; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: xprefix = JFFS2_XPREFIX_ACL_ACCESS; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 62c50da9d493..9d9fb7cf093e 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -27,7 +27,7 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -struct posix_acl *jffs2_get_acl(struct inode *inode, int type); +struct posix_acl *jffs2_get_acl(struct inode *inode, int type, bool rcu); int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *); diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 43c285c3d2a7..a653f34c6e26 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -14,13 +14,16 @@ #include "jfs_xattr.h" #include "jfs_acl.h" -struct posix_acl *jfs_get_acl(struct inode *inode, int type) +struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; char *ea_name; int size; char *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch(type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 7ae389a7a366..3de40286d31f 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -7,7 +7,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -struct posix_acl *jfs_get_acl(struct inode *inode, int type); +struct posix_acl *jfs_get_acl(struct inode *inode, int type, bool rcu); int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index c8a192802dda..03a4e679fd99 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -11,7 +11,7 @@ * nfs3acl.c */ #ifdef CONFIG_NFS_V3_ACL -extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type); +extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu); extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 9ec560aa4a50..93de0b58647a 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -44,7 +44,7 @@ static void nfs3_abort_get_acl(struct posix_acl **p) cmpxchg(p, sentinel, ACL_NOT_CACHED); } -struct posix_acl *nfs3_get_acl(struct inode *inode, int type) +struct posix_acl *nfs3_get_acl(struct inode *inode, int type, bool rcu) { struct nfs_server *server = NFS_SERVER(inode); struct page *pages[NFSACL_MAXPAGES] = { }; @@ -62,6 +62,9 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) }; int status, count; + if (rcu) + return ERR_PTR(-ECHILD); + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) return ERR_PTR(-EOPNOTSUPP); diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 5c72a7e6d6c5..23a72a423955 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -289,7 +289,7 @@ unlock: return status; } -struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu) { struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; @@ -297,6 +297,9 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) int had_lock; struct ocfs2_lock_holder oh; + if (rcu) + return ERR_PTR(-ECHILD); + osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) return NULL; diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index f59d8d0a61fa..95a57c888ab6 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -16,7 +16,7 @@ struct ocfs2_acl_entry { __le32 e_id; }; -struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type); +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type, bool rcu); int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *); diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c index 18852b9ed82b..605e5a3506ec 100644 --- a/fs/orangefs/acl.c +++ b/fs/orangefs/acl.c @@ -10,12 +10,15 @@ #include "orangefs-bufmap.h" #include -struct posix_acl *orangefs_get_acl(struct inode *inode, int type) +struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu) { struct posix_acl *acl; int ret; char *key = NULL, *value = NULL; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: key = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 0e6b97682e41..b5940ec1836a 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -106,7 +106,7 @@ enum orangefs_vfs_op_states { extern int orangefs_init_acl(struct inode *inode, struct inode *dir); extern const struct xattr_handler *orangefs_xattr_handlers[]; -extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type); +extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type, bool rcu); extern int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 7d52e5ef2ac7..ea335d3e55cf 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -446,12 +446,15 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return res; } -struct posix_acl *ovl_get_acl(struct inode *inode, int type) +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) { struct inode *realinode = ovl_inode_real(inode); const struct cred *old_cred; struct posix_acl *acl; + if (rcu) + return ERR_PTR(-ECHILD); + if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index e9b3e7880fc0..3894f3347955 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -498,7 +498,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); -struct posix_acl *ovl_get_acl(struct inode *inode, int type); +struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu); int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags); bool ovl_is_private_xattr(struct super_block *sb, const char *name); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index f3309a7edb49..4f1ef826e040 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -138,7 +138,7 @@ struct posix_acl *get_acl(struct inode *inode, int type) set_cached_acl(inode, type, NULL); return NULL; } - acl = inode->i_op->get_acl(inode, type); + acl = inode->i_op->get_acl(inode, type, false); if (IS_ERR(acl)) { /* diff --git a/fs/reiserfs/acl.h b/fs/reiserfs/acl.h index fd58618da360..d9052b8ce6dd 100644 --- a/fs/reiserfs/acl.h +++ b/fs/reiserfs/acl.h @@ -48,7 +48,7 @@ static inline int reiserfs_acl_count(size_t size) } #ifdef CONFIG_REISERFS_FS_POSIX_ACL -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type); +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu); int reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); int reiserfs_acl_chmod(struct inode *inode); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index a9547144a099..d6fcddc46f5b 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -190,13 +190,16 @@ fail: * inode->i_mutex: down * BKL held [before 2.5.x] */ -struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) +struct posix_acl *reiserfs_get_acl(struct inode *inode, int type, bool rcu) { char *name, *value; struct posix_acl *acl; int size; int retval; + if (rcu) + return ERR_PTR(-ECHILD); + switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index d02bef24b32b..9e8ac9fa9666 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -125,7 +125,7 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) } struct posix_acl * -xfs_get_acl(struct inode *inode, int type) +xfs_get_acl(struct inode *inode, int type, bool rcu) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; @@ -137,6 +137,9 @@ xfs_get_acl(struct inode *inode, int type) }; int error; + if (rcu) + return ERR_PTR(-ECHILD); + trace_xfs_get_acl(ip); switch (type) { diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 7bdb3a4ed798..bb6abdcb265d 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -10,13 +10,13 @@ struct inode; struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL -extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); +extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); #else -static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type) +static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu) { return NULL; } diff --git a/include/linux/fs.h b/include/linux/fs.h index ae6c6c34db94..73376dfe28d0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2065,7 +2065,7 @@ struct inode_operations { struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *); int (*permission) (struct user_namespace *, struct inode *, int); - struct posix_acl * (*get_acl)(struct inode *, int); + struct posix_acl * (*get_acl)(struct inode *, int, bool); int (*readlink) (struct dentry *, char __user *,int); From 332f606b32b6291a944c8cf23b91f53a6e676525 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 18 Aug 2021 22:08:24 +0200 Subject: [PATCH 14/14] ovl: enable RCU'd ->get_acl() Overlayfs does not cache ACL's (to avoid double caching). Instead it just calls the underlying filesystem's i_op->get_acl(), which will return the cached value, if possible. In rcu path walk, however, get_cached_acl_rcu() is employed to get the value from the cache, which will fail on overlayfs resulting in dropping out of rcu walk mode. This can result in a big performance hit in certain situations. Fix by calling ->get_acl() with rcu=true in case of ACL_DONT_CACHE (which indicates pass-through) Reported-by: garyhuang Signed-off-by: Miklos Szeredi --- fs/overlayfs/inode.c | 7 ++++--- fs/posix_acl.c | 13 ++++++++++++- include/linux/fs.h | 5 +++++ include/linux/posix_acl.h | 3 ++- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ea335d3e55cf..832b17589733 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "overlayfs.h" @@ -452,12 +453,12 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) const struct cred *old_cred; struct posix_acl *acl; - if (rcu) - return ERR_PTR(-ECHILD); - if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; + if (rcu) + return get_cached_acl_rcu(realinode, type); + old_cred = ovl_override_creds(inode->i_sb); acl = get_acl(realinode, type); revert_creds(old_cred); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 4f1ef826e040..f5c25f580dd9 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -22,6 +22,7 @@ #include #include #include +#include static struct posix_acl **acl_by_type(struct inode *inode, int type) { @@ -56,7 +57,17 @@ EXPORT_SYMBOL(get_cached_acl); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type) { - return rcu_dereference(*acl_by_type(inode, type)); + struct posix_acl *acl = rcu_dereference(*acl_by_type(inode, type)); + + if (acl == ACL_DONT_CACHE) { + struct posix_acl *ret; + + ret = inode->i_op->get_acl(inode, type, LOOKUP_RCU); + if (!IS_ERR(ret)) + acl = ret; + } + + return acl; } EXPORT_SYMBOL(get_cached_acl_rcu); diff --git a/include/linux/fs.h b/include/linux/fs.h index 73376dfe28d0..c6e5bcbff0c0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -581,6 +581,11 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +/* + * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to + * cache the ACL. This also means that ->get_acl() can be called in RCU mode + * with the LOOKUP_RCU flag. + */ #define ACL_DONT_CACHE ((void *)(-3)) static inline struct posix_acl * diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 307094ebb88c..b65c877d92b8 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -72,6 +72,8 @@ extern struct posix_acl *get_posix_acl(struct inode *, int); extern int set_posix_acl(struct user_namespace *, struct inode *, int, struct posix_acl *); +struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); + #ifdef CONFIG_FS_POSIX_ACL int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t); extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, @@ -84,7 +86,6 @@ extern int simple_set_acl(struct user_namespace *, struct inode *, extern int simple_acl_create(struct inode *, struct inode *); struct posix_acl *get_cached_acl(struct inode *inode, int type); -struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); void forget_cached_acl(struct inode *inode, int type); void forget_all_cached_acls(struct inode *inode);