forked from Minki/linux
btrfs: reset last_reflink_trans after fsyncing inode
When an inode has a last_reflink_trans matching the current transaction, we have to take special care when logging its checksums in order to avoid getting checksum items with overlapping ranges in a log tree, which could result in missing checksums after log replay (more on that in the changelogs of commit40e046acbd
("Btrfs: fix missing data checksums after replaying a log tree") and commite289f03ea7
("btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents")). We also need to make sure a full fsync will copy all old file extent items it finds in modified leaves, because they might have been copied from some other inode. However once we fsync an inode, we don't need to keep paying the price of that extra special care in future fsyncs done in the same transaction, unless the inode is used for another reflink operation or the full sync flag is set on it (truncate, failure to allocate extent maps for holes, and other exceptional and infrequent cases). So after we fsync an inode reset its last_unlink_trans to zero. In case another reflink happens, we continue to update the last_reflink_trans of the inode, just as before. Also set last_reflink_trans to the generation of the last transaction that modified the inode whenever we need to set the full sync flag on the inode, just like when we need to load an inode from disk after eviction. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
96acb3753e
commit
23e3337faf
@ -341,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Should be called while holding the inode's VFS lock in exclusive mode or in a
|
||||
* context where no one else can access the inode concurrently (during inode
|
||||
* creation or when loading an inode from disk).
|
||||
*/
|
||||
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
|
||||
{
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
/*
|
||||
* The inode may have been part of a reflink operation in the last
|
||||
* transaction that modified it, and then a fsync has reset the
|
||||
* last_reflink_trans to avoid subsequent fsyncs in the same
|
||||
* transaction to do unnecessary work. So update last_reflink_trans
|
||||
* to the last_trans value (we have to be pessimistic and assume a
|
||||
* reflink happened).
|
||||
*
|
||||
* The ->last_trans is protected by the inode's spinlock and we can
|
||||
* have a concurrent ordered extent completion update it. Also set
|
||||
* last_reflink_trans to ->last_trans only if the former is less than
|
||||
* the later, because we can be called in a context where
|
||||
* last_reflink_trans was set to the current transaction generation
|
||||
* while ->last_trans was not yet updated in the current transaction,
|
||||
* and therefore has a lower value.
|
||||
*/
|
||||
spin_lock(&inode->lock);
|
||||
if (inode->last_reflink_trans < inode->last_trans)
|
||||
inode->last_reflink_trans = inode->last_trans;
|
||||
spin_unlock(&inode->lock);
|
||||
}
|
||||
|
||||
static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
|
||||
{
|
||||
bool ret = false;
|
||||
|
@ -2514,7 +2514,7 @@ out:
|
||||
hole_em = alloc_extent_map();
|
||||
if (!hole_em) {
|
||||
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
} else {
|
||||
hole_em->start = offset;
|
||||
hole_em->len = end - offset;
|
||||
@ -2535,8 +2535,7 @@ out:
|
||||
} while (ret == -EEXIST);
|
||||
free_extent_map(hole_em);
|
||||
if (ret)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -2890,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
|
||||
* maps for the replacement extents (or holes).
|
||||
*/
|
||||
if (extent_info && !extent_info->is_new_extent)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
|
||||
if (ret)
|
||||
goto out_trans;
|
||||
|
@ -418,7 +418,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
out:
|
||||
/*
|
||||
* Don't forget to free the reserved space, as for inlined extent
|
||||
@ -4911,8 +4911,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
|
||||
cur_offset + hole_size - 1, 0);
|
||||
hole_em = alloc_extent_map();
|
||||
if (!hole_em) {
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&inode->runtime_flags);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
goto next;
|
||||
}
|
||||
hole_em->start = cur_offset;
|
||||
@ -6165,7 +6164,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
|
||||
* sync since it will be a full sync anyway and this will blow away the
|
||||
* old info in the log.
|
||||
*/
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(inode));
|
||||
|
||||
key[0].objectid = objectid;
|
||||
key[0].type = BTRFS_INODE_ITEM_KEY;
|
||||
@ -8767,7 +8766,7 @@ out:
|
||||
* extents beyond i_size to drop.
|
||||
*/
|
||||
if (control.extents_found > 0)
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(inode));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -9975,8 +9974,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(inode));
|
||||
goto next;
|
||||
}
|
||||
|
||||
|
@ -277,7 +277,7 @@ copy_inline_extent:
|
||||
path->slots[0]),
|
||||
size);
|
||||
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(dst));
|
||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
|
||||
out:
|
||||
if (!ret && !trans) {
|
||||
@ -580,8 +580,7 @@ process_slot:
|
||||
* replaced file extent items.
|
||||
*/
|
||||
if (last_dest_end >= i_size_read(inode))
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_set_inode_full_sync(BTRFS_I(inode));
|
||||
|
||||
ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
|
||||
last_dest_end, destoff + len - 1, NULL, &trans);
|
||||
|
@ -6013,6 +6013,14 @@ log_extents:
|
||||
if (inode_only != LOG_INODE_EXISTS)
|
||||
inode->last_log_commit = inode->last_sub_trans;
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
/*
|
||||
* Reset the last_reflink_trans so that the next fsync does not need to
|
||||
* go through the slower path when logging extents and their checksums.
|
||||
*/
|
||||
if (inode_only == LOG_INODE_ALL)
|
||||
inode->last_reflink_trans = 0;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&inode->log_mutex);
|
||||
out:
|
||||
|
Loading…
Reference in New Issue
Block a user