btrfs: introduce per-inode file extent tree

In order to keep track of where we have file extents on disk, and thus
where it is safe to adjust the i_size to, we need to have a tree in
place to keep track of the contiguous areas we have file extents for.

Add helpers to use this tree, as it's not required for NO_HOLES file
systems.  We will use this by setting DIRTY for areas we know we have
file extent item's set, and clearing it when we remove file extent items
for truncation.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Josef Bacik 2020-01-17 09:02:21 -05:00 committed by David Sterba
parent 790a1d44f9
commit 41a2ee75aa
7 changed files with 159 additions and 0 deletions

View File

@ -60,6 +60,12 @@ struct btrfs_inode {
*/ */
struct extent_io_tree io_failure_tree; struct extent_io_tree io_failure_tree;
/*
* Keep track of where the inode has extent items mapped in order to
* make sure the i_size adjustments are accurate
*/
struct extent_io_tree file_extent_tree;
/* held while logging the inode in tree-log.c */ /* held while logging the inode in tree-log.c */
struct mutex log_mutex; struct mutex log_mutex;

View File

@ -2859,6 +2859,11 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
struct btrfs_file_extent_item *fi, struct btrfs_file_extent_item *fi,
const bool new_inline, const bool new_inline,
struct extent_map *em); struct extent_map *em);
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len);
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size);
/* inode.c */ /* inode.c */
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode, struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,

View File

@ -43,6 +43,7 @@ enum {
IO_TREE_RELOC_BLOCKS, IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES, IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES, IO_TREE_ROOT_DIRTY_LOG_PAGES,
IO_TREE_INODE_FILE_EXTENT,
IO_TREE_SELFTEST, IO_TREE_SELFTEST,
}; };
@ -222,6 +223,8 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
struct extent_state **cached_state); struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits); u64 *start_ret, u64 *end_ret, unsigned bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits);
int extent_invalidatepage(struct extent_io_tree *tree, int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset); struct page *page, unsigned long offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,

View File

@ -257,6 +257,15 @@ void __cold extent_io_exit(void)
bioset_exit(&btrfs_bioset); bioset_exit(&btrfs_bioset);
} }
/*
* For the file_extent_tree, we want to hold the inode lock when we lookup and
* update the disk_i_size, but lockdep will complain because our io_tree we hold
* the tree lock and get the inode lock when setting delalloc. These two things
* are unrelated, so make a class for the file_extent_tree so we don't get the
* two locking patterns mixed up.
*/
static struct lock_class_key file_extent_tree_class;
void extent_io_tree_init(struct btrfs_fs_info *fs_info, void extent_io_tree_init(struct btrfs_fs_info *fs_info,
struct extent_io_tree *tree, unsigned int owner, struct extent_io_tree *tree, unsigned int owner,
void *private_data) void *private_data)
@ -268,6 +277,8 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
spin_lock_init(&tree->lock); spin_lock_init(&tree->lock);
tree->private_data = private_data; tree->private_data = private_data;
tree->owner = owner; tree->owner = owner;
if (owner == IO_TREE_INODE_FILE_EXTENT)
lockdep_set_class(&tree->lock, &file_extent_tree_class);
} }
void extent_io_tree_release(struct extent_io_tree *tree) void extent_io_tree_release(struct extent_io_tree *tree)
@ -1567,6 +1578,43 @@ out:
return ret; return ret;
} }
/**
* find_contiguous_extent_bit: find a contiguous area of bits
* @tree - io tree to check
* @start - offset to start the search from
* @start_ret - the first offset we found with the bits set
* @end_ret - the final contiguous range of the bits that were set
* @bits - bits to look for
*
* set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
* to set bits appropriately, and then merge them again. During this time it
* will drop the tree->lock, so use this helper if you want to find the actual
* contiguous area for given bits. We will search to the first bit we find, and
* then walk down the tree until we find a non-contiguous area. The area
* returned will be the full contiguous area with the bits set.
*/
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, unsigned bits)
{
struct extent_state *state;
int ret = 1;
spin_lock(&tree->lock);
state = find_first_extent_bit_state(tree, start, bits);
if (state) {
*start_ret = state->start;
*end_ret = state->end;
while ((state = next_state(state)) != NULL) {
if (state->start > (*end_ret + 1))
break;
*end_ret = state->end;
}
ret = 0;
}
spin_unlock(&tree->lock);
return ret;
}
/** /**
* find_first_clear_extent_bit - find the first range that has @bits not set. * find_first_clear_extent_bit - find the first range that has @bits not set.
* This range could start before @start. * This range could start before @start.

View File

@ -23,6 +23,97 @@
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \ #define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
PAGE_SIZE)) PAGE_SIZE))
/**
* @inode - the inode we want to update the disk_i_size for
* @new_i_size - the i_size we want to set to, 0 if we use i_size
*
* With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
* returns as it is perfectly fine with a file that has holes without hole file
* extent items.
*
* However without NO_HOLES we need to only return the area that is contiguous
* from the 0 offset of the file. Otherwise we could end up adjust i_size up
* to an extent that has a gap in between.
*
* Finally new_i_size should only be set in the case of truncate where we're not
* ready to use i_size_read() as the limiter yet.
*/
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
u64 start, end, i_size;
int ret;
i_size = new_i_size ?: i_size_read(inode);
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
BTRFS_I(inode)->disk_i_size = i_size;
return;
}
spin_lock(&BTRFS_I(inode)->lock);
ret = find_contiguous_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
&start, &end, EXTENT_DIRTY);
if (!ret && start == 0)
i_size = min(i_size, end + 1);
else
i_size = 0;
BTRFS_I(inode)->disk_i_size = i_size;
spin_unlock(&BTRFS_I(inode)->lock);
}
/**
* @inode - the inode we're modifying
* @start - the start file offset of the file extent we've inserted
* @len - the logical length of the file extent item
*
* Call when we are inserting a new file extent where there was none before.
* Does not need to call this in the case where we're replacing an existing file
* extent, however if not sure it's fine to call this multiple times.
*
* The start and len must match the file extent item, so thus must be sectorsize
* aligned.
*/
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len)
{
if (len == 0)
return 0;
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY);
}
/**
* @inode - the inode we're modifying
* @start - the start file offset of the file extent we've inserted
* @len - the logical length of the file extent item
*
* Called when we drop a file extent, for example when we truncate. Doesn't
* need to be called for cases where we're replacing a file extent, like when
* we've COWed a file extent.
*
* The start and len must match the file extent item, so thus must be sectorsize
* aligned.
*/
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
u64 len)
{
if (len == 0)
return 0;
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
len == (u64)-1);
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return clear_extent_bit(&inode->file_extent_tree, start,
start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
}
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info, static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
u16 csum_size) u16 csum_size)
{ {

View File

@ -3187,6 +3187,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
i_uid_write(inode, btrfs_inode_uid(leaf, inode_item)); i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item)); i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item)); btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
round_up(i_size_read(inode), fs_info->sectorsize));
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime); inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime); inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
@ -8776,6 +8778,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode); extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
extent_io_tree_init(fs_info, &ei->io_failure_tree, extent_io_tree_init(fs_info, &ei->io_failure_tree,
IO_TREE_INODE_IO_FAILURE, inode); IO_TREE_INODE_IO_FAILURE, inode);
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT, inode);
ei->io_tree.track_uptodate = true; ei->io_tree.track_uptodate = true;
ei->io_failure_tree.track_uptodate = true; ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0); atomic_set(&ei->sync_writers, 0);
@ -8842,6 +8846,7 @@ void btrfs_destroy_inode(struct inode *inode)
btrfs_qgroup_check_reserved_leak(inode); btrfs_qgroup_check_reserved_leak(inode);
inode_tree_del(inode); inode_tree_del(inode);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0); btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
} }
int btrfs_drop_inode(struct inode *inode) int btrfs_drop_inode(struct inode *inode)

View File

@ -88,6 +88,7 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
{ IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \
{ IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \ { IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \
{ IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \ { IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \
{ IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \
{ IO_TREE_SELFTEST, "SELFTEST" }) { IO_TREE_SELFTEST, "SELFTEST" })
#define BTRFS_GROUP_FLAGS \ #define BTRFS_GROUP_FLAGS \