From 8f4d855839179f410fa910a26eb81d646d628f26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 May 2015 18:19:01 -0400 Subject: [PATCH 1/7] ext4: fix lazytime optimization We had a fencepost error in the lazytime optimization which means that timestamp would get written to the wrong inode. Cc: stable@vger.kernel.org Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 55b187c3bac1..0554b0b5957b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4345,7 +4345,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = orig_ino & ~(inodes_per_block - 1); + ino = (orig_ino & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; From 1b46617b8d0851c78b6440701d78fffbd9170741 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 May 2015 18:37:30 -0400 Subject: [PATCH 2/7] ext4: don't save the error information if the block device is read-only Google-Bug-Id: 20939131 Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f06d0589ddba..ca9d4a2fed41 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -294,6 +294,8 @@ static void __save_error_info(struct super_block *sb, const char *func, struct ext4_super_block *es = EXT4_SB(sb)->s_es; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); es->s_last_error_time = cpu_to_le32(get_seconds()); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); From 92c82639106214a9d34daa2bc791605eb1faab07 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 May 2015 18:43:36 -0400 Subject: [PATCH 3/7] ext4: remove unused function prototype from ext4.h The ext4_extent_tree_init() function hasn't been in the ext4 code for a long time ago, except in an unused function prototype in ext4.h Google-Bug-Id: 4530137 Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 009a0590b20f..9a83f149ac85 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2889,7 +2889,6 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblocks); -extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, struct ext4_ext_path *path); From 9d506594069355d1fb2de3f9104667312ff08ed3 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 14 May 2015 18:55:18 -0400 Subject: [PATCH 4/7] ext4: fix NULL pointer dereference when journal restart fails Currently when journal restart fails, we'll have the h_transaction of the handle set to NULL to indicate that the handle has been effectively aborted. We handle this situation quietly in the jbd2_journal_stop() and just free the handle and exit because everything else has been done before we attempted (and failed) to restart the journal. Unfortunately there are a number of problems with that approach introduced with commit 41a5b913197c "jbd2: invalidate handle if jbd2_journal_restart() fails" First of all in ext4 jbd2_journal_stop() will be called through __ext4_journal_stop() where we would try to get a hold of the superblock by dereferencing h_transaction which in this case would lead to NULL pointer dereference and crash. In addition we're going to free the handle regardless of the refcount which is bad as well, because others up the call chain will still reference the handle so we might potentially reference already freed memory. Moreover it's expected that we'll get aborted handle as well as detached handle in some of the journalling function as the error propagates up the stack, so it's unnecessary to call WARN_ON every time we get detached handle. And finally we might leak some memory by forgetting to free reserved handle in jbd2_journal_stop() in the case where handle was detached from the transaction (h_transaction is NULL). Fix the NULL pointer dereference in __ext4_journal_stop() by just calling jbd2_journal_stop() quietly as suggested by Jan Kara. Also fix the potential memory leak in jbd2_journal_stop() and use proper handle refcounting before we attempt to free it to avoid use-after-free issues. And finally remove all WARN_ON(!transaction) from the code so that we do not get random traces when something goes wrong because when journal restart fails we will get to some of those functions. Cc: stable@vger.kernel.org Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/ext4_jbd2.c | 6 ++++++ fs/jbd2/transaction.c | 25 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 3445035c7e01..d41843181818 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -87,6 +87,12 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) ext4_put_nojournal(handle); return 0; } + + if (!handle->h_transaction) { + err = jbd2_journal_stop(handle); + return handle->h_err ? handle->h_err : err; + } + sb = handle->h_transaction->t_journal->j_private; err = handle->h_err; rc = jbd2_journal_stop(handle); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5f09370c90a8..ff2f2e6ad311 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -551,7 +551,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) int result; int wanted; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -627,7 +626,6 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) tid_t tid; int need_to_start, ret; - WARN_ON(!transaction); /* If we've had an abort of any type, don't even think about * actually doing the restart! */ if (is_handle_aborted(handle)) @@ -785,7 +783,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int need_copy = 0; unsigned long start_lock, time_lock; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -1051,7 +1048,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) int err; jbd_debug(5, "journal_head %p\n", jh); - WARN_ON(!transaction); err = -EROFS; if (is_handle_aborted(handle)) goto out; @@ -1266,7 +1262,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int ret = 0; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -1397,7 +1392,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) int err = 0; int was_modified = 0; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -1530,8 +1524,22 @@ int jbd2_journal_stop(handle_t *handle) tid_t tid; pid_t pid; - if (!transaction) - goto free_and_exit; + if (!transaction) { + /* + * Handle is already detached from the transaction so + * there is nothing to do other than decrease a refcount, + * or free the handle if refcount drops to zero + */ + if (--handle->h_ref > 0) { + jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, + handle->h_ref); + return err; + } else { + if (handle->h_rsv_handle) + jbd2_free_handle(handle->h_rsv_handle); + goto free_and_exit; + } + } journal = transaction->t_journal; J_ASSERT(journal_current_handle() == handle); @@ -2373,7 +2381,6 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) transaction_t *transaction = handle->h_transaction; journal_t *journal; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; From 2f974865ffdfe7b9f46a9940836c8b167342563d Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 14 May 2015 19:00:45 -0400 Subject: [PATCH 5/7] ext4: check for zero length extent explicitly The following commit introduced a bug when checking for zero length extent 5946d08 ext4: check for overlapping extents in ext4_valid_extent_entries() Zero length extent could pass the check if lblock is zero. Adding the explicit check for zero length back. Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d74e08029643..451b92a9958a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); ext4_lblk_t last = lblock + len - 1; - if (lblock > last) + if (len == 0 || lblock > last) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } From e531d0bceb402e643a4499de40dd3fa39d8d2e43 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 14 May 2015 19:11:50 -0400 Subject: [PATCH 6/7] jbd2: fix r_count overflows leading to buffer overflow in journal recovery The journal revoke block recovery code does not check r_count for sanity, which means that an evil value of r_count could result in the kernel reading off the end of the revoke table and into whatever garbage lies beyond. This could crash the kernel, so fix that. However, in testing this fix, I discovered that the code to write out the revoke tables also was not correctly checking to see if the block was full -- the current offset check is fine so long as the revoke table space size is a multiple of the record size, but this is not true when either journal_csum_v[23] are set. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/jbd2/recovery.c | 10 +++++++++- fs/jbd2/revoke.c | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index b5128c6e63ad..a9079d035ae5 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -842,15 +842,23 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, { jbd2_journal_revoke_header_t *header; int offset, max; + int csum_size = 0; + __u32 rcount; int record_len = 4; header = (jbd2_journal_revoke_header_t *) bh->b_data; offset = sizeof(jbd2_journal_revoke_header_t); - max = be32_to_cpu(header->r_count); + rcount = be32_to_cpu(header->r_count); if (!jbd2_revoke_block_csum_verify(journal, header)) return -EINVAL; + if (jbd2_journal_has_csum_v2or3(journal)) + csum_size = sizeof(struct jbd2_journal_revoke_tail); + if (rcount > journal->j_blocksize - csum_size) + return -EINVAL; + max = rcount; + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) record_len = 8; diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index c6cbaef2bda1..14214da80eb8 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -577,7 +577,7 @@ static void write_one_revoke_record(journal_t *journal, { int csum_size = 0; struct buffer_head *descriptor; - int offset; + int sz, offset; journal_header_t *header; /* If we are already aborting, this all becomes a noop. We @@ -594,9 +594,14 @@ static void write_one_revoke_record(journal_t *journal, if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + sz = 8; + else + sz = 4; + /* Make sure we have a descriptor with space left for the record */ if (descriptor) { - if (offset >= journal->j_blocksize - csum_size) { + if (offset + sz > journal->j_blocksize - csum_size) { flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } @@ -619,16 +624,13 @@ static void write_one_revoke_record(journal_t *journal, *descriptorp = descriptor; } - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); - offset += 8; - - } else { + else * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); - offset += 4; - } + offset += sz; *offsetp = offset; } From b9576fc3624eb9fc88bec0d0ae883fd78be86239 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 15 May 2015 00:24:10 -0400 Subject: [PATCH 7/7] ext4: fix an ext3 collapse range regression in xfstests The xfstests test suite assumes that an attempt to collapse range on the range (0, 1) will return EOPNOTSUPP if the file system does not support collapse range. Commit 280227a75b56: "ext4: move check under lock scope to close a race" broke this, and this caused xfstests to fail when run when testing file systems that did not have the extents feature enabled. Reported-by: Eric Whitney Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 451b92a9958a..e003a1e81dc3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5396,6 +5396,14 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) loff_t new_size, ioffset; int ret; + /* + * We need to test this early because xfstests assumes that a + * collapse range of (0, 1) will return EOPNOTSUPP if the file + * system does not support collapse range. + */ + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + return -EOPNOTSUPP; + /* Collapse range works only on fs block size aligned offsets. */ if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || len & (EXT4_CLUSTER_SIZE(sb) - 1))