jbd2: ensure abort the journal if detect IO error when writing original buffer back

Although we merged c044f3d836 ("jbd2: abort journal if free a async write error metadata buffer"), there is a race between jbd2_journal_try_to_free_buffers() and jbd2_journal_destroy(), so the jbd2_log_do_checkpoint() may still fail to detect the buffer write io error flag which may lead to filesystem inconsistency. jbd2_journal_try_to_free_buffers() ext4_put_super() jbd2_journal_destroy() __jbd2_journal_remove_checkpoint() detect buffer write error jbd2_log_do_checkpoint() jbd2_cleanup_journal_tail() <--- lead to inconsistency jbd2_journal_abort() Fix this issue by introducing a new atomic flag which only have one JBD2_CHECKPOINT_IO_ERROR bit now, and set it in __jbd2_journal_remove_checkpoint() when freeing a checkpoint buffer which has write_io_error flag. Then jbd2_journal_destroy() will detect this mark and abort the journal to prevent updating log tail. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20210610112440.3438139-3-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2024-11-22 20:22:09 +00:00 · 2021-06-10 19:24:34 +08:00 · 2021-06-10 19:24:34 +08:00 · fcf37549ae
commit fcf37549ae
parent 1866cba842
3 changed files with 37 additions and 0 deletions
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@ -564,6 +564,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	struct transaction_chp_stats_s *stats;
 	transaction_t *transaction;
 	journal_t *journal;
+	struct buffer_head *bh = jh2bh(jh);

 	JBUFFER_TRACE(jh, "entry");

@ -575,6 +576,17 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;

 	JBUFFER_TRACE(jh, "removing from transaction");
+
+	/*
+	 * If we have failed to write the buffer out to disk, the filesystem
+	 * may become inconsistent. We cannot abort the journal here since
+	 * we hold j_list_lock and we have to be careful about races with
+	 * jbd2_journal_destroy(). So mark the writeback IO error in the
+	 * journal here and we abort the journal later from a better context.
+	 */
+	if (buffer_write_io_error(bh))
+		set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags);
+
 	__buffer_unlink(jh);
 	jh->b_cp_transaction = NULL;
 	jbd2_journal_put_journal_head(jh);
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@ -1610,6 +1610,10 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,

 	if (is_journal_aborted(journal))
 		return -EIO;
+	if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) {
+		jbd2_journal_abort(journal, -EIO);
+		return -EIO;
+	}

 	BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
 	jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
@ -2091,6 +2095,16 @@ int jbd2_journal_destroy(journal_t *journal)
 	J_ASSERT(journal->j_checkpoint_transactions == NULL);
 	spin_unlock(&journal->j_list_lock);

+	/*
+	 * OK, all checkpoint transactions have been checked, now check the
+	 * write out io error flag and abort the journal if some buffer failed
+	 * to write back to the original location, otherwise the filesystem
+	 * may become inconsistent.
+	 */
+	if (!is_journal_aborted(journal) &&
+	    test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags))
+		jbd2_journal_abort(journal, -EIO);
+
 	if (journal->j_sb_buffer) {
 		if (!is_journal_aborted(journal)) {
 			mutex_lock_io(&journal->j_checkpoint_mutex);
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@ -779,6 +779,11 @@ struct journal_s
 	 */
 	unsigned long		j_flags;

+	/**
+	 * @j_atomic_flags: Atomic journaling state flags.
+	 */
+	unsigned long		j_atomic_flags;
+
 	/**
 	 * @j_errno:
 	 *
@ -1375,6 +1380,12 @@ JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,	FAST_COMMIT)
 #define JBD2_JOURNAL_FLUSH_VALID	(JBD2_JOURNAL_FLUSH_DISCARD | \
 					JBD2_JOURNAL_FLUSH_ZEROOUT)

+/*
+ * Journal atomic flag definitions
+ */
+#define JBD2_CHECKPOINT_IO_ERROR	0x001	/* Detect io error while writing
+						 * buffer back to disk */
+
 /*
 * Function declarations for the journaling transaction and buffer
 * management