btrfs: use rbtree with leftmost node cached for tracking lowest block group

We keep track of the start offset of the block group with the lowest start
offset at fs_info->first_logical_byte. This requires explicitly updating
that field every time we add, delete or lookup a block group to/from the
red black tree at fs_info->block_group_cache_tree.

Since the block group with the lowest start address happens to always be
the one that is the leftmost node of the tree, we can use a red black tree
that caches the left most node. Then when we need the start address of
that block group, we can just quickly get the leftmost node in the tree
and extract the start offset of that node's block group. This avoids the
need to explicitly keep track of that address in the dedicated member
fs_info->first_logical_byte, and it also allows the next patch in the
series to switch the lock that protects the red black tree from a spin
lock to a read/write lock - without this change it would be tricky
because block group searches also update fs_info->first_logical_byte.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Filipe Manana 2022-04-13 16:20:40 +01:00 committed by David Sterba
parent 0eb997bff0
commit 08dddb2951
6 changed files with 25 additions and 37 deletions

View File

@ -168,11 +168,12 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
struct rb_node **p;
struct rb_node *parent = NULL;
struct btrfs_block_group *cache;
bool leftmost = true;
ASSERT(block_group->length != 0);
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
p = &info->block_group_cache_tree.rb_root.rb_node;
while (*p) {
parent = *p;
@ -181,6 +182,7 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
p = &(*p)->rb_left;
} else if (block_group->start > cache->start) {
p = &(*p)->rb_right;
leftmost = false;
} else {
spin_unlock(&info->block_group_cache_lock);
return -EEXIST;
@ -188,11 +190,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
}
rb_link_node(&block_group->cache_node, parent, p);
rb_insert_color(&block_group->cache_node,
&info->block_group_cache_tree);
if (info->first_logical_byte > block_group->start)
info->first_logical_byte = block_group->start;
rb_insert_color_cached(&block_group->cache_node,
&info->block_group_cache_tree, leftmost);
spin_unlock(&info->block_group_cache_lock);
@ -211,7 +210,7 @@ static struct btrfs_block_group *block_group_cache_tree_search(
u64 end, start;
spin_lock(&info->block_group_cache_lock);
n = info->block_group_cache_tree.rb_node;
n = info->block_group_cache_tree.rb_root.rb_node;
while (n) {
cache = rb_entry(n, struct btrfs_block_group, cache_node);
@ -233,11 +232,8 @@ static struct btrfs_block_group *block_group_cache_tree_search(
break;
}
}
if (ret) {
if (ret)
btrfs_get_block_group(ret);
if (bytenr == 0 && info->first_logical_byte > ret->start)
info->first_logical_byte = ret->start;
}
spin_unlock(&info->block_group_cache_lock);
return ret;
@ -958,15 +954,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
goto out;
spin_lock(&fs_info->block_group_cache_lock);
rb_erase(&block_group->cache_node,
&fs_info->block_group_cache_tree);
rb_erase_cached(&block_group->cache_node,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
/* Once for the block groups rbtree */
btrfs_put_block_group(block_group);
if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
down_write(&block_group->space_info->groups_sem);
@ -4014,11 +4008,11 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
spin_unlock(&info->zone_active_bgs_lock);
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
while ((n = rb_last(&info->block_group_cache_tree.rb_root)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group,
cache_node);
rb_erase(&block_group->cache_node,
&info->block_group_cache_tree);
rb_erase_cached(&block_group->cache_node,
&info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
spin_unlock(&info->block_group_cache_lock);

View File

@ -680,8 +680,7 @@ struct btrfs_fs_info {
/* block group cache stuff */
spinlock_t block_group_cache_lock;
u64 first_logical_byte;
struct rb_root block_group_cache_tree;
struct rb_root_cached block_group_cache_tree;
/* keep track of unallocated space */
atomic64_t free_chunk_space;

View File

@ -3232,8 +3232,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_init_async_reclaim_work(fs_info);
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
fs_info->first_logical_byte = (u64)-1;
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
extent_io_tree_init(fs_info, &fs_info->excluded_extents,
IO_TREE_FS_EXCLUDED_EXTENTS, NULL);

View File

@ -2494,23 +2494,19 @@ static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
static u64 first_logical_byte(struct btrfs_fs_info *fs_info)
{
struct btrfs_block_group *cache;
u64 bytenr;
struct rb_node *leftmost;
u64 bytenr = 0;
spin_lock(&fs_info->block_group_cache_lock);
bytenr = fs_info->first_logical_byte;
spin_unlock(&fs_info->block_group_cache_lock);
if (bytenr < (u64)-1)
return bytenr;
/* Get the block group with the lowest logical start address. */
cache = btrfs_lookup_first_block_group(fs_info, 0);
if (!cache)
return 0;
leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
if (leftmost) {
struct btrfs_block_group *bg;
bytenr = cache->start;
btrfs_put_block_group(cache);
bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
bytenr = bg->start;
}
spin_unlock(&fs_info->block_group_cache_lock);
return bytenr;
}

View File

@ -4072,7 +4072,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
btrfs_info(fs_info, "cleaning free space cache v1");
node = rb_first(&fs_info->block_group_cache_tree);
node = rb_first_cached(&fs_info->block_group_cache_tree);
while (node) {
block_group = rb_entry(node, struct btrfs_block_group, cache_node);
ret = btrfs_remove_free_space_inode(trans, NULL, block_group);

View File

@ -1178,7 +1178,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
goto abort;
}
node = rb_first(&fs_info->block_group_cache_tree);
node = rb_first_cached(&fs_info->block_group_cache_tree);
while (node) {
block_group = rb_entry(node, struct btrfs_block_group,
cache_node);