mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 12:11:40 +00:00
2c8f5e8cdf
We don't need to set the EXTENT_UPDATE bit in an inode's io_tree to mark a
range as uptodate, we rely on the pages themselves being uptodate - page
reading is not triggered for already uptodate pages. Recently we removed
most use of the EXTENT_UPTODATE for buffered IO with commit 52b029f427
("btrfs: remove unnecessary EXTENT_UPTODATE state in buffered I/O path"),
but there were a few leftovers, namely when reading from holes and
successfully finishing read repair.
These leftovers are unnecessarily making an inode's tree larger and deeper,
slowing down searches on it. So remove all the leftovers.
This change is part of a patchset that has the goal to make performance
better for applications that use lseek's SEEK_HOLE and SEEK_DATA modes to
iterate over the extents of a file. Two examples are the cp program from
coreutils 9.0+ and the tar program (when using its --sparse / -S option).
A sample test and results are listed in the changelog of the last patch
in the series:
1/9 btrfs: remove leftover setting of EXTENT_UPTODATE state in an inode's io_tree
2/9 btrfs: add an early exit when searching for delalloc range for lseek/fiemap
3/9 btrfs: skip unnecessary delalloc searches during lseek/fiemap
4/9 btrfs: search for delalloc more efficiently during lseek/fiemap
5/9 btrfs: remove no longer used btrfs_next_extent_map()
6/9 btrfs: allow passing a cached state record to count_range_bits()
7/9 btrfs: update stale comment for count_range_bits()
8/9 btrfs: use cached state when looking for delalloc ranges with fiemap
9/9 btrfs: use cached state when looking for delalloc ranges with lseek
Reported-by: Wang Yugui <wangyugui@e16-tech.com>
Link: https://lore.kernel.org/linux-btrfs/20221106073028.71F9.409509F4@e16-tech.com/
Link: https://lore.kernel.org/linux-btrfs/CAL3q7H5NSVicm7nYBJ7x8fFkDpno8z3PYt5aPU43Bajc1H0h1Q@mail.gmail.com/
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
233 lines
7.2 KiB
C
233 lines
7.2 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef BTRFS_EXTENT_IO_TREE_H
|
|
#define BTRFS_EXTENT_IO_TREE_H
|
|
|
|
#include "misc.h"
|
|
|
|
struct extent_changeset;
|
|
struct io_failure_record;
|
|
|
|
/* Bits for the extent state */
|
|
enum {
|
|
ENUM_BIT(EXTENT_DIRTY),
|
|
ENUM_BIT(EXTENT_UPTODATE),
|
|
ENUM_BIT(EXTENT_LOCKED),
|
|
ENUM_BIT(EXTENT_NEW),
|
|
ENUM_BIT(EXTENT_DELALLOC),
|
|
ENUM_BIT(EXTENT_DEFRAG),
|
|
ENUM_BIT(EXTENT_BOUNDARY),
|
|
ENUM_BIT(EXTENT_NODATASUM),
|
|
ENUM_BIT(EXTENT_CLEAR_META_RESV),
|
|
ENUM_BIT(EXTENT_NEED_WAIT),
|
|
ENUM_BIT(EXTENT_NORESERVE),
|
|
ENUM_BIT(EXTENT_QGROUP_RESERVED),
|
|
ENUM_BIT(EXTENT_CLEAR_DATA_RESV),
|
|
/*
|
|
* Must be cleared only during ordered extent completion or on error
|
|
* paths if we did not manage to submit bios and create the ordered
|
|
* extents for the range. Should not be cleared during page release
|
|
* and page invalidation (if there is an ordered extent in flight),
|
|
* that is left for the ordered extent completion.
|
|
*/
|
|
ENUM_BIT(EXTENT_DELALLOC_NEW),
|
|
/*
|
|
* When an ordered extent successfully completes for a region marked as
|
|
* a new delalloc range, use this flag when clearing a new delalloc
|
|
* range to indicate that the VFS' inode number of bytes should be
|
|
* incremented and the inode's new delalloc bytes decremented, in an
|
|
* atomic way to prevent races with stat(2).
|
|
*/
|
|
ENUM_BIT(EXTENT_ADD_INODE_BYTES),
|
|
/*
|
|
* Set during truncate when we're clearing an entire range and we just
|
|
* want the extent states to go away.
|
|
*/
|
|
ENUM_BIT(EXTENT_CLEAR_ALL_BITS),
|
|
};
|
|
|
|
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
|
EXTENT_CLEAR_DATA_RESV)
|
|
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \
|
|
EXTENT_ADD_INODE_BYTES | \
|
|
EXTENT_CLEAR_ALL_BITS)
|
|
|
|
/*
|
|
* Redefined bits above which are used only in the device allocation tree,
|
|
* shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
|
|
* / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
|
|
* manipulation functions
|
|
*/
|
|
#define CHUNK_ALLOCATED EXTENT_DIRTY
|
|
#define CHUNK_TRIMMED EXTENT_DEFRAG
|
|
#define CHUNK_STATE_MASK (CHUNK_ALLOCATED | \
|
|
CHUNK_TRIMMED)
|
|
|
|
enum {
|
|
IO_TREE_FS_PINNED_EXTENTS,
|
|
IO_TREE_FS_EXCLUDED_EXTENTS,
|
|
IO_TREE_BTREE_INODE_IO,
|
|
IO_TREE_INODE_IO,
|
|
IO_TREE_RELOC_BLOCKS,
|
|
IO_TREE_TRANS_DIRTY_PAGES,
|
|
IO_TREE_ROOT_DIRTY_LOG_PAGES,
|
|
IO_TREE_INODE_FILE_EXTENT,
|
|
IO_TREE_LOG_CSUM_RANGE,
|
|
IO_TREE_SELFTEST,
|
|
IO_TREE_DEVICE_ALLOC_STATE,
|
|
};
|
|
|
|
struct extent_io_tree {
|
|
struct rb_root state;
|
|
struct btrfs_fs_info *fs_info;
|
|
/* Inode associated with this tree, or NULL. */
|
|
struct btrfs_inode *inode;
|
|
|
|
/* Who owns this io tree, should be one of IO_TREE_* */
|
|
u8 owner;
|
|
|
|
spinlock_t lock;
|
|
};
|
|
|
|
struct extent_state {
|
|
u64 start;
|
|
u64 end; /* inclusive */
|
|
struct rb_node rb_node;
|
|
|
|
/* ADD NEW ELEMENTS AFTER THIS */
|
|
wait_queue_head_t wq;
|
|
refcount_t refs;
|
|
u32 state;
|
|
|
|
#ifdef CONFIG_BTRFS_DEBUG
|
|
struct list_head leak_list;
|
|
#endif
|
|
};
|
|
|
|
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
|
struct extent_io_tree *tree, unsigned int owner);
|
|
void extent_io_tree_release(struct extent_io_tree *tree);
|
|
|
|
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
|
struct extent_state **cached);
|
|
|
|
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
|
struct extent_state **cached);
|
|
|
|
int __init extent_state_init_cachep(void);
|
|
void __cold extent_state_free_cachep(void);
|
|
|
|
u64 count_range_bits(struct extent_io_tree *tree,
|
|
u64 *start, u64 search_end,
|
|
u64 max_bytes, u32 bits, int contig);
|
|
|
|
void free_extent_state(struct extent_state *state);
|
|
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
|
u32 bits, int filled, struct extent_state *cached_state);
|
|
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
|
u32 bits, struct extent_changeset *changeset);
|
|
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
|
u32 bits, struct extent_state **cached, gfp_t mask,
|
|
struct extent_changeset *changeset);
|
|
|
|
static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
|
u64 end, u32 bits,
|
|
struct extent_state **cached)
|
|
{
|
|
return __clear_extent_bit(tree, start, end, bits, cached,
|
|
GFP_NOFS, NULL);
|
|
}
|
|
|
|
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
|
struct extent_state **cached)
|
|
{
|
|
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
|
|
GFP_NOFS, NULL);
|
|
}
|
|
|
|
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
|
|
u64 end, u32 bits)
|
|
{
|
|
return clear_extent_bit(tree, start, end, bits, NULL);
|
|
}
|
|
|
|
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
|
u32 bits, struct extent_changeset *changeset);
|
|
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
|
u32 bits, struct extent_state **cached_state, gfp_t mask);
|
|
|
|
static inline int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start,
|
|
u64 end, u32 bits)
|
|
{
|
|
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOWAIT);
|
|
}
|
|
|
|
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
|
|
u64 end, u32 bits)
|
|
{
|
|
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOFS);
|
|
}
|
|
|
|
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
|
u64 end, struct extent_state **cached_state)
|
|
{
|
|
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
|
|
cached_state, GFP_NOFS, NULL);
|
|
}
|
|
|
|
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
|
|
u64 end, gfp_t mask)
|
|
{
|
|
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, mask);
|
|
}
|
|
|
|
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
|
|
u64 end, struct extent_state **cached)
|
|
{
|
|
return clear_extent_bit(tree, start, end,
|
|
EXTENT_DIRTY | EXTENT_DELALLOC |
|
|
EXTENT_DO_ACCOUNTING, cached);
|
|
}
|
|
|
|
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
|
u32 bits, u32 clear_bits,
|
|
struct extent_state **cached_state);
|
|
|
|
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
|
|
u64 end, u32 extra_bits,
|
|
struct extent_state **cached_state)
|
|
{
|
|
return set_extent_bit(tree, start, end,
|
|
EXTENT_DELALLOC | extra_bits,
|
|
cached_state, GFP_NOFS);
|
|
}
|
|
|
|
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
|
|
u64 end, struct extent_state **cached_state)
|
|
{
|
|
return set_extent_bit(tree, start, end,
|
|
EXTENT_DELALLOC | EXTENT_DEFRAG,
|
|
cached_state, GFP_NOFS);
|
|
}
|
|
|
|
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
|
|
u64 end)
|
|
{
|
|
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
|
|
}
|
|
|
|
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
|
u64 *start_ret, u64 *end_ret, u32 bits,
|
|
struct extent_state **cached_state);
|
|
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
|
u64 *start_ret, u64 *end_ret, u32 bits);
|
|
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
|
u64 *start_ret, u64 *end_ret, u32 bits);
|
|
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
|
|
u64 *end, u64 max_bytes,
|
|
struct extent_state **cached_state);
|
|
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits,
|
|
struct extent_state **cached_state);
|
|
|
|
#endif /* BTRFS_EXTENT_IO_TREE_H */
|