From 973e5245637accc4002843f6b888495a6a7762bc Mon Sep 17 00:00:00 2001 From: Hu Weiwen Date: Mon, 22 Nov 2021 22:22:12 +0800 Subject: [PATCH 1/4] ceph: fix duplicate increment of opened_inodes metric opened_inodes is incremented twice when the same inode is opened twice with O_RDONLY and O_WRONLY respectively. To reproduce, run this python script, then check the metrics: import os for _ in range(10000): fd_r = os.open('a', os.O_RDONLY) fd_w = os.open('a', os.O_WRONLY) os.close(fd_r) os.close(fd_w) Fixes: 1dd8d4708136 ("ceph: metrics for opened files, pinned caps and opened inodes") Signed-off-by: Hu Weiwen Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b9460b6fb76f..c447fa2e2d1f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4350,7 +4350,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; - bool is_opened = false; + bool already_opened = false; int i; if (count == 1) @@ -4358,19 +4358,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { - if (bits & (1 << i)) - ci->i_nr_by_mode[i] += count; - /* - * If any of the mode ref is larger than 1, + * If any of the mode ref is larger than 0, * that means it has been already opened by * others. Just skip checking the PIN ref. */ - if (i && ci->i_nr_by_mode[i] > 1) - is_opened = true; + if (i && ci->i_nr_by_mode[i]) + already_opened = true; + + if (bits & (1 << i)) + ci->i_nr_by_mode[i] += count; } - if (!is_opened) + if (!already_opened) percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } From e485d028bb1075d6167558b47f63e10713ad2034 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 23 Nov 2021 07:30:38 -0500 Subject: [PATCH 2/4] ceph: initialize i_size variable in ceph_sync_read Newer compilers seem to determine that this variable being uninitialized isn't a problem, but older compilers (from the RHEL8 era) seem to choke on it and complain that it could be used uninitialized. Go ahead and initialize the variable at declaration time to silence potential compiler warnings. Fixes: c3d8e0b5de48 ("ceph: return the real size read when it hits EOF") Signed-off-by: Jeff Layton Reviewed-by: Xiubo Li Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 02a0a0fd9ccd..b24442e27e4e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -847,7 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to, ssize_t ret; u64 off = iocb->ki_pos; u64 len = iov_iter_count(to); - u64 i_size; + u64 i_size = i_size_read(inode); dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); From ee2a095d3b24f300a5e11944d208801e928f108c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 30 Nov 2021 19:20:34 +0800 Subject: [PATCH 3/4] ceph: initialize pathlen variable in reconnect_caps_cb The smatch static checker warned about an uninitialized symbol usage in this function, in the case where ceph_mdsc_build_path returns an error. It turns out that that case is harmless, but it just looks sketchy. Initialize the variable at declaration time, and remove the unneeded setting of it later. Fixes: a33f6432b3a6 ("ceph: encode inodes' parent/d_name in cap reconnect message") Reported-by: Dan Carpenter Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/mds_client.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 250aad330a10..c30eefc0ac19 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3683,7 +3683,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; char *path; - int pathlen, err; + int pathlen = 0, err; u64 pathbase; u64 snap_follows; @@ -3703,7 +3703,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, } } else { path = NULL; - pathlen = 0; pathbase = 0; } From fd84bfdddd169c219c3a637889a8b87f70a072c2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 29 Nov 2021 12:16:39 +0100 Subject: [PATCH 4/4] ceph: fix up non-directory creation in SGID directories Ceph always inherits the SGID bit if it is set on the parent inode, while the generic inode_init_owner does not do this in a few cases where it can create a possible security problem (cf. [1]). Update ceph to strip the SGID bit just as inode_init_owner would. This bug was detected by the mapped mount testsuite in [3]. The testsuite tests all core VFS functionality and semantics with and without mapped mounts. That is to say it functions as a generic VFS testsuite in addition to a mapped mount testsuite. While working on mapped mount support for ceph, SIGD inheritance was the only failing test for ceph after the port. The same bug was detected by the mapped mount testsuite in XFS in January 2021 (cf. [2]). [1]: commit 0fa3ecd87848 ("Fix up non-directory creation in SGID directories") [2]: commit 01ea173e103e ("xfs: fix up non-directory creation in SGID directories") [3]: https://git.kernel.org/fs/xfs/xfstests-dev.git Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b24442e27e4e..c138e8126286 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -605,13 +605,25 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino); in.cap.flags = CEPH_CAP_FLAG_AUTH; in.ctime = in.mtime = in.atime = iinfo.btime; - in.mode = cpu_to_le32((u32)mode); in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); - in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_mode & S_ISGID ? - dir->i_gid : current_fsgid())); + if (dir->i_mode & S_ISGID) { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); + + /* Directories always inherit the setgid bit. */ + if (S_ISDIR(mode)) + mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(dir->i_gid) && + !capable_wrt_inode_uidgid(&init_user_ns, dir, CAP_FSETID)) + mode &= ~S_ISGID; + } else { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + } + in.mode = cpu_to_le32((u32)mode); + in.nlink = cpu_to_le32(1); in.max_size = cpu_to_le64(lo->stripe_unit);