From 1e9d63331f8fa556f31e1406ab12f2a1e5cdb495 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:44 -0800 Subject: [PATCH 01/10] fs: correctly document the inode dirty flags The documentation for I_DIRTY_SYNC and I_DIRTY_DATASYNC is a bit misleading, and I_DIRTY_TIME isn't documented at all. Fix this. Link: https://lore.kernel.org/r/20210112190253.64307-3-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- include/linux/fs.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index fd47deea7c17..45a0303b2aeb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2084,8 +2084,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, /* * Inode state bits. Protected by inode->i_lock * - * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, - * I_DIRTY_DATASYNC and I_DIRTY_PAGES. + * Four bits determine the dirty state of the inode: I_DIRTY_SYNC, + * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME. * * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at @@ -2094,12 +2094,20 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * Two bits are used for locking and completion notification, I_NEW and I_SYNC. * * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on - * fdatasync(). i_atime is the usual cause. - * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of + * fdatasync() (unless I_DIRTY_DATASYNC is also set). + * Timestamp updates are the usual cause. + * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of * these changes separately from I_DIRTY_SYNC so that we * don't have to write inode on fdatasync() when only - * mtime has changed in it. + * e.g. the timestamps have changed. * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. + * I_DIRTY_TIME The inode itself only has dirty timestamps, and the + * lazytime mount option is enabled. We keep track of this + * separately from I_DIRTY_SYNC in order to implement + * lazytime. This gets cleared if I_DIRTY_INODE + * (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e. + * either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in + * i_state, but not both. I_DIRTY_PAGES may still be set. * I_NEW Serves as both a mutex and completion notification. * New inodes set I_NEW. If two processes both create * the same inode, one of them will release its inode and From e20b14db050ae25ef7c5a2d9a41dd62b7e5e75c1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:45 -0800 Subject: [PATCH 02/10] fs: only specify I_DIRTY_TIME when needed in generic_update_time() generic_update_time() always passes I_DIRTY_TIME to __mark_inode_dirty(), which doesn't really make sense because (a) generic_update_time() might be asked to do only an i_version update, not also a timestamps update; and (b) I_DIRTY_TIME is only supposed to be set in i_state if the filesystem has lazytime enabled, so using it unconditionally in generic_update_time() is inconsistent. As a result there is a weird edge case where if only an i_version update was requested (not also a timestamps update) but it is no longer needed (i.e. inode_maybe_inc_iversion() returns false), then I_DIRTY_TIME will be set in i_state even if the filesystem isn't mounted with lazytime. Fix this by only passing I_DIRTY_TIME to __mark_inode_dirty() if the timestamps were updated and the filesystem has lazytime enabled. Link: https://lore.kernel.org/r/20210112190253.64307-4-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/inode.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 6442d97d9a4a..d0fa43d8e9d5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1743,24 +1743,26 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, int generic_update_time(struct inode *inode, struct timespec64 *time, int flags) { - int iflags = I_DIRTY_TIME; - bool dirty = false; + int dirty_flags = 0; - if (flags & S_ATIME) - inode->i_atime = *time; - if (flags & S_VERSION) - dirty = inode_maybe_inc_iversion(inode, false); - if (flags & S_CTIME) - inode->i_ctime = *time; - if (flags & S_MTIME) - inode->i_mtime = *time; - if ((flags & (S_ATIME | S_CTIME | S_MTIME)) && - !(inode->i_sb->s_flags & SB_LAZYTIME)) - dirty = true; + if (flags & (S_ATIME | S_CTIME | S_MTIME)) { + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; - if (dirty) - iflags |= I_DIRTY_SYNC; - __mark_inode_dirty(inode, iflags); + if (inode->i_sb->s_flags & SB_LAZYTIME) + dirty_flags |= I_DIRTY_TIME; + else + dirty_flags |= I_DIRTY_SYNC; + } + + if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)) + dirty_flags |= I_DIRTY_SYNC; + + __mark_inode_dirty(inode, dirty_flags); return 0; } EXPORT_SYMBOL(generic_update_time); From ff4136e64d129c4b617331d6c84a3e1781dda70d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:46 -0800 Subject: [PATCH 03/10] fat: only specify I_DIRTY_TIME when needed in fat_update_time() As was done for generic_update_time(), only pass I_DIRTY_TIME to __mark_inode_dirty() when the inode's timestamps were actually updated and lazytime is enabled. This avoids a weird edge case where I_DIRTY_TIME could be set in i_state when lazytime isn't enabled. Link: https://lore.kernel.org/r/20210112190253.64307-5-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/fat/misc.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/fat/misc.c b/fs/fat/misc.c index f1b2a1fc2a6a..18a50a46b57f 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -329,22 +329,23 @@ EXPORT_SYMBOL_GPL(fat_truncate_time); int fat_update_time(struct inode *inode, struct timespec64 *now, int flags) { - int iflags = I_DIRTY_TIME; - bool dirty = false; + int dirty_flags = 0; if (inode->i_ino == MSDOS_ROOT_INO) return 0; - fat_truncate_time(inode, now, flags); - if (flags & S_VERSION) - dirty = inode_maybe_inc_iversion(inode, false); - if ((flags & (S_ATIME | S_CTIME | S_MTIME)) && - !(inode->i_sb->s_flags & SB_LAZYTIME)) - dirty = true; + if (flags & (S_ATIME | S_CTIME | S_MTIME)) { + fat_truncate_time(inode, now, flags); + if (inode->i_sb->s_flags & SB_LAZYTIME) + dirty_flags |= I_DIRTY_TIME; + else + dirty_flags |= I_DIRTY_SYNC; + } - if (dirty) - iflags |= I_DIRTY_SYNC; - __mark_inode_dirty(inode, iflags); + if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false)) + dirty_flags |= I_DIRTY_SYNC; + + __mark_inode_dirty(inode, dirty_flags); return 0; } EXPORT_SYMBOL_GPL(fat_update_time); From e2728c5621fd9c68c65a6647875a1d1c67b9f257 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:47 -0800 Subject: [PATCH 04/10] fs: don't call ->dirty_inode for lazytime timestamp updates There is no need to call ->dirty_inode for lazytime timestamp updates (i.e. for __mark_inode_dirty(I_DIRTY_TIME)), since by the definition of lazytime, filesystems must ignore these updates. Filesystems only need to care about the updated timestamps when they expire. Therefore, only call ->dirty_inode when I_DIRTY_INODE is set. Based on a patch from Christoph Hellwig: https://lore.kernel.org/r/20200325122825.1086872-4-hch@lst.de Link: https://lore.kernel.org/r/20210112190253.64307-6-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/ext4/inode.c | 12 +----------- fs/f2fs/super.c | 3 --- fs/fs-writeback.c | 6 +++--- fs/gfs2/super.c | 2 -- 4 files changed, 4 insertions(+), 19 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 27946882d4ce..4cc6c7834312 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5933,26 +5933,16 @@ out: * If the inode is marked synchronous, we don't honour that here - doing * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. - * - * If only the I_DIRTY_TIME flag is set, we can skip everything. If - * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need - * to copy into the on-disk inode structure are the timestamp files. */ void ext4_dirty_inode(struct inode *inode, int flags) { handle_t *handle; - if (flags == I_DIRTY_TIME) - return; handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) - goto out; - + return; ext4_mark_inode_dirty(handle, inode); - ext4_journal_stop(handle); -out: - return; } int ext4_change_inode_journal_flag(struct inode *inode, int val) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b4a07fe62d1a..cc98dc49f4a2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1196,9 +1196,6 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) inode->i_ino == F2FS_META_INO(sbi)) return; - if (flags == I_DIRTY_TIME) - return; - if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) clear_inode_flag(inode, FI_AUTO_RECOVER); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c41cb887eb7d..b7616bbd5533 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2255,16 +2255,16 @@ void __mark_inode_dirty(struct inode *inode, int flags) * Don't do this for I_DIRTY_PAGES - that doesn't actually * dirty the inode itself */ - if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) { + if (flags & I_DIRTY_INODE) { trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags); trace_writeback_dirty_inode(inode, flags); - } - if (flags & I_DIRTY_INODE) + flags &= ~I_DIRTY_TIME; + } dirtytime = flags & I_DIRTY_TIME; /* diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2f56acc41c04..042b94288ff1 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -562,8 +562,6 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int need_endtrans = 0; int ret; - if (!(flags & I_DIRTY_INODE)) - return; if (unlikely(gfs2_withdrawn(sdp))) return; if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { From a38ed483a72672ee6bdb5d8cf17fc0838377baa0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:48 -0800 Subject: [PATCH 05/10] fs: pass only I_DIRTY_INODE flags to ->dirty_inode ->dirty_inode is now only called when I_DIRTY_INODE (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) is set. However it may still be passed other dirty flags at the same time, provided that these other flags happened to be passed to __mark_inode_dirty() at the same time as I_DIRTY_INODE. This doesn't make sense because there is no reason for filesystems to care about these extra flags. Nor are filesystems notified about all updates to these other flags. Therefore, mask the flags before passing them to ->dirty_inode. Also properly document ->dirty_inode in vfs.rst. Link: https://lore.kernel.org/r/20210112190253.64307-7-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- Documentation/filesystems/vfs.rst | 5 ++++- fs/fs-writeback.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index ca52c82e5bb5..287b80948a40 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -270,7 +270,10 @@ or bottom half). ->alloc_inode. ``dirty_inode`` - this method is called by the VFS to mark an inode dirty. + this method is called by the VFS when an inode is marked dirty. + This is specifically for the inode itself being marked dirty, + not its data. If the update needs to be persisted by fdatasync(), + then I_DIRTY_DATASYNC will be set in the flags argument. ``write_inode`` this method is called when the VFS needs to write an inode to diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b7616bbd5533..2e6064012f7d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2259,7 +2259,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) - sb->s_op->dirty_inode(inode, flags); + sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE); trace_writeback_dirty_inode(inode, flags); From 35d14f278e530ecb635ab00de984065ed90ee12f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:49 -0800 Subject: [PATCH 06/10] fs: clean up __mark_inode_dirty() a bit Improve some comments, and don't bother checking for the I_DIRTY_TIME flag in the case where we just cleared it. Also, warn if I_DIRTY_TIME and I_DIRTY_PAGES are passed to __mark_inode_dirty() at the same time, as this case isn't handled. Link: https://lore.kernel.org/r/20210112190253.64307-8-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/fs-writeback.c | 49 +++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 2e6064012f7d..80ee9816d9df 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2219,23 +2219,24 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) } /** - * __mark_inode_dirty - internal function + * __mark_inode_dirty - internal function to mark an inode dirty * * @inode: inode to mark - * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) + * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of + * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined + * with I_DIRTY_PAGES. * - * Mark an inode as dirty. Callers should use mark_inode_dirty or - * mark_inode_dirty_sync. + * Mark an inode as dirty. We notify the filesystem, then update the inode's + * dirty flags. Then, if needed we add the inode to the appropriate dirty list. * - * Put the inode on the super block's dirty list. + * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync() + * instead of calling this directly. * - * CAREFUL! We mark it dirty unconditionally, but move it onto the - * dirty list only if it is hashed or if it refers to a blockdev. - * If it was not hashed, it will never be added to the dirty list - * even if it is later hashed, as it will have been marked dirty already. + * CAREFUL! We only add the inode to the dirty list if it is hashed or if it + * refers to a blockdev. Unhashed inodes will never be added to the dirty list + * even if they are later hashed, as they will have been marked dirty already. * - * In short, make sure you hash any inodes _before_ you start marking - * them dirty. + * In short, ensure you hash any inodes _before_ you start marking them dirty. * * Note that for blockdevs, inode->dirtied_when represents the dirtying time of * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of @@ -2247,25 +2248,34 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; - int dirtytime; + int dirtytime = 0; trace_writeback_mark_inode_dirty(inode, flags); - /* - * Don't do this for I_DIRTY_PAGES - that doesn't actually - * dirty the inode itself - */ if (flags & I_DIRTY_INODE) { + /* + * Notify the filesystem about the inode being dirtied, so that + * (if needed) it can update on-disk fields and journal the + * inode. This is only needed when the inode itself is being + * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not + * for just I_DIRTY_PAGES or I_DIRTY_TIME. + */ trace_writeback_dirty_inode_start(inode, flags); - if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE); - trace_writeback_dirty_inode(inode, flags); + /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ flags &= ~I_DIRTY_TIME; + } else { + /* + * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing. + * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME + * in one call to __mark_inode_dirty().) + */ + dirtytime = flags & I_DIRTY_TIME; + WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME); } - dirtytime = flags & I_DIRTY_TIME; /* * Paired with smp_mb() in __writeback_single_inode() for the @@ -2288,6 +2298,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode_attach_wb(inode, NULL); + /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */ if (flags & I_DIRTY_INODE) inode->i_state &= ~I_DIRTY_TIME; inode->i_state |= flags; From 83dc881d678a8e973212afa0c5e4fb2fe1b06d4e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:50 -0800 Subject: [PATCH 07/10] fs: drop redundant check from __writeback_single_inode() wbc->for_sync implies wbc->sync_mode == WB_SYNC_ALL, so there's no need to check for both. Just check for WB_SYNC_ALL. Link: https://lore.kernel.org/r/20210112190253.64307-9-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 80ee9816d9df..cee1df6e3bd4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1479,7 +1479,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * change I_DIRTY_TIME into I_DIRTY_SYNC. */ if ((inode->i_state & I_DIRTY_TIME) && - (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync || + (wbc->sync_mode == WB_SYNC_ALL || time_after(jiffies, inode->dirtied_time_when + dirtytime_expire_interval * HZ))) { trace_writeback_lazytime(inode); From da0c4c60d8c7c8cc5266b0c5fe7bdf136e5e6415 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:51 -0800 Subject: [PATCH 08/10] fs: improve comments for writeback_single_inode() Some comments for writeback_single_inode() and __writeback_single_inode() are outdated or not very helpful, especially with regards to writeback list handling. Update them. Link: https://lore.kernel.org/r/20210112190253.64307-10-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/fs-writeback.c | 57 +++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cee1df6e3bd4..e91980f49388 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1442,9 +1442,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, } /* - * Write out an inode and its dirty pages. Do not update the writeback list - * linkage. That is left to the caller. The caller is also responsible for - * setting I_SYNC flag and calling inode_sync_complete() to clear it. + * Write out an inode and its dirty pages (or some of its dirty pages, depending + * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state. + * + * This doesn't remove the inode from the writeback list it is on, except + * potentially to move it from b_dirty_time to b_dirty due to timestamp + * expiration. The caller is otherwise responsible for writeback list handling. + * + * The caller is also responsible for setting the I_SYNC flag beforehand and + * calling inode_sync_complete() to clear it afterwards. */ static int __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) @@ -1487,9 +1493,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Some filesystems may redirty the inode during the writeback - * due to delalloc, clear dirty metadata flags right before - * write_inode() + * Get and clear the dirty flags from i_state. This needs to be done + * after calling writepages because some filesystems may redirty the + * inode during writepages due to delalloc. It also needs to be done + * after handling timestamp expiration, as that may dirty the inode too. */ spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; @@ -1524,12 +1531,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Write out an inode's dirty pages. Either the caller has an active reference - * on the inode or the inode has I_WILL_FREE set. + * Write out an inode's dirty data and metadata on-demand, i.e. separately from + * the regular batched writeback done by the flusher threads in + * writeback_sb_inodes(). @wbc controls various aspects of the write, such as + * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE). * - * This function is designed to be called for writing back one inode which - * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() - * and does more profound writeback list handling in writeback_sb_inodes(). + * To prevent the inode from going away, either the caller must have a reference + * to the inode, or the inode must have I_WILL_FREE or I_FREEING set. */ static int writeback_single_inode(struct inode *inode, struct writeback_control *wbc) @@ -1544,23 +1552,23 @@ static int writeback_single_inode(struct inode *inode, WARN_ON(inode->i_state & I_WILL_FREE); if (inode->i_state & I_SYNC) { + /* + * Writeback is already running on the inode. For WB_SYNC_NONE, + * that's enough and we can just return. For WB_SYNC_ALL, we + * must wait for the existing writeback to complete, then do + * writeback again if there's anything left. + */ if (wbc->sync_mode != WB_SYNC_ALL) goto out; - /* - * It's a data-integrity sync. We must wait. Since callers hold - * inode reference or inode has I_WILL_FREE set, it cannot go - * away under us. - */ __inode_wait_for_writeback(inode); } WARN_ON(inode->i_state & I_SYNC); /* - * Skip inode if it is clean and we have no outstanding writeback in - * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this - * function since flusher thread may be doing for example sync in - * parallel and if we move the inode, it could get skipped. So here we - * make sure inode is on some writeback list and leave it there unless - * we have completely cleaned the inode. + * If the inode is already fully clean, then there's nothing to do. + * + * For data-integrity syncs we also need to check whether any pages are + * still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If + * there are any such pages, we'll need to wait for them. */ if (!(inode->i_state & I_DIRTY_ALL) && (wbc->sync_mode != WB_SYNC_ALL || @@ -1576,8 +1584,9 @@ static int writeback_single_inode(struct inode *inode, wb = inode_to_wb_and_lock_list(inode); spin_lock(&inode->i_lock); /* - * If inode is clean, remove it from writeback lists. Otherwise don't - * touch it. See comment above for explanation. + * If the inode is now fully clean, then it can be safely removed from + * its writeback list (if any). Otherwise the flusher threads are + * responsible for the writeback lists. */ if (!(inode->i_state & I_DIRTY_ALL)) inode_io_list_del_locked(inode, wb); From 3aac630b5ce18b0a7e34cb35cdd99fcc508edc92 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:52 -0800 Subject: [PATCH 09/10] gfs2: don't worry about I_DIRTY_TIME in gfs2_fsync() The I_DIRTY_TIME flag is primary used within the VFS, and there's no reason for ->fsync() implementations to do anything with it. This is because when !datasync, the VFS will expire dirty timestamps before calling ->fsync(). (See vfs_fsync_range().) This turns I_DIRTY_TIME into I_DIRTY_SYNC. Therefore, change gfs2_fsync() to not check for I_DIRTY_TIME. Link: https://lore.kernel.org/r/20210112190253.64307-11-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/gfs2/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index b39b339feddc..7fe2497755a3 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -749,7 +749,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - int sync_state = inode->i_state & I_DIRTY_ALL; + int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; @@ -762,7 +762,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) - sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME); + sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); From ed296c6c05b0ac52d7c6bf13a90f02b8b8222169 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 12 Jan 2021 11:02:53 -0800 Subject: [PATCH 10/10] ext4: simplify i_state checks in __ext4_update_other_inode_time() Since I_DIRTY_TIME and I_DIRTY_INODE are mutually exclusive in i_state, there's no need to check for I_DIRTY_TIME && !I_DIRTY_INODE. Just check for I_DIRTY_TIME. Also introduce a helper function in include/linux/fs.h to do this check. Link: https://lore.kernel.org/r/20210112190253.64307-12-ebiggers@kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Eric Biggers Signed-off-by: Jan Kara --- fs/ext4/inode.c | 8 ++------ include/linux/fs.h | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4cc6c7834312..d809a06b6ef7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4961,15 +4961,11 @@ static void __ext4_update_other_inode_time(struct super_block *sb, if (!inode) return; - if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | - I_DIRTY_INODE)) || - ((inode->i_state & I_DIRTY_TIME) == 0)) + if (!inode_is_dirtytime_only(inode)) return; spin_lock(&inode->i_lock); - if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | - I_DIRTY_INODE)) == 0) && - (inode->i_state & I_DIRTY_TIME)) { + if (inode_is_dirtytime_only(inode)) { struct ext4_inode_info *ei = EXT4_I(inode); inode->i_state &= ~I_DIRTY_TIME; diff --git a/include/linux/fs.h b/include/linux/fs.h index 45a0303b2aeb..de0c789104c2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2194,6 +2194,21 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } +/* + * Returns true if the given inode itself only has dirty timestamps (its pages + * may still be dirty) and isn't currently being allocated or freed. + * Filesystems should call this if when writing an inode when lazytime is + * enabled, they want to opportunistically write the timestamps of other inodes + * located very nearby on-disk, e.g. in the same inode block. This returns true + * if the given inode is in need of such an opportunistic update. Requires + * i_lock, or at least later re-checking under i_lock. + */ +static inline bool inode_is_dirtytime_only(struct inode *inode) +{ + return (inode->i_state & (I_DIRTY_TIME | I_NEW | + I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME; +} + extern void inc_nlink(struct inode *inode); extern void drop_nlink(struct inode *inode); extern void clear_nlink(struct inode *inode);