btrfs: introduce end_bio_subpage_eb_writepage() function
The new function, end_bio_subpage_eb_writepage(), will handle the metadata writeback endio. The major differences involved are: - How to grab extent buffer Now page::private is a pointer to btrfs_subpage, we can no longer grab extent buffer directly. Thus we need to use the bv_offset to locate the extent buffer manually and iterate through the whole range. - Use btrfs_subpage_end_writeback() caller This helper will handle the subpage writeback for us. Since this function is executed under endio context, when grabbing extent buffers it can't grab eb->refs_lock as that lock is not designed to be grabbed under hardirq context. So here introduce a helper, find_extent_buffer_nolock(), for such situation, and convert find_extent_buffer() to use that helper. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
fb686c6824
commit
2f3186d8ee
@ -4080,13 +4080,98 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The endio specific version which won't touch any unsafe spinlock in endio
|
||||
* context.
|
||||
*/
|
||||
static struct extent_buffer *find_extent_buffer_nolock(
|
||||
struct btrfs_fs_info *fs_info, u64 start)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
rcu_read_lock();
|
||||
eb = radix_tree_lookup(&fs_info->buffer_radix,
|
||||
start >> fs_info->sectorsize_bits);
|
||||
if (eb && atomic_inc_not_zero(&eb->refs)) {
|
||||
rcu_read_unlock();
|
||||
return eb;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The endio function for subpage extent buffer write.
|
||||
*
|
||||
* Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
|
||||
* after all extent buffers in the page has finished their writeback.
|
||||
*/
|
||||
static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
|
||||
struct bio *bio)
|
||||
{
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||
bio_for_each_segment_all(bvec, bio, iter_all) {
|
||||
struct page *page = bvec->bv_page;
|
||||
u64 bvec_start = page_offset(page) + bvec->bv_offset;
|
||||
u64 bvec_end = bvec_start + bvec->bv_len - 1;
|
||||
u64 cur_bytenr = bvec_start;
|
||||
|
||||
ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
|
||||
|
||||
/* Iterate through all extent buffers in the range */
|
||||
while (cur_bytenr <= bvec_end) {
|
||||
struct extent_buffer *eb;
|
||||
int done;
|
||||
|
||||
/*
|
||||
* Here we can't use find_extent_buffer(), as it may
|
||||
* try to lock eb->refs_lock, which is not safe in endio
|
||||
* context.
|
||||
*/
|
||||
eb = find_extent_buffer_nolock(fs_info, cur_bytenr);
|
||||
ASSERT(eb);
|
||||
|
||||
cur_bytenr = eb->start + eb->len;
|
||||
|
||||
ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
|
||||
done = atomic_dec_and_test(&eb->io_pages);
|
||||
ASSERT(done);
|
||||
|
||||
if (bio->bi_status ||
|
||||
test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
|
||||
ClearPageUptodate(page);
|
||||
set_btree_ioerr(page, eb);
|
||||
}
|
||||
|
||||
btrfs_subpage_clear_writeback(fs_info, page, eb->start,
|
||||
eb->len);
|
||||
end_extent_buffer_writeback(eb);
|
||||
/*
|
||||
* free_extent_buffer() will grab spinlock which is not
|
||||
* safe in endio context. Thus here we manually dec
|
||||
* the ref.
|
||||
*/
|
||||
atomic_dec(&eb->refs);
|
||||
}
|
||||
}
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void end_bio_extent_buffer_writepage(struct bio *bio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct bio_vec *bvec;
|
||||
struct extent_buffer *eb;
|
||||
int done;
|
||||
struct bvec_iter_all iter_all;
|
||||
|
||||
fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
|
||||
if (fs_info->sectorsize < PAGE_SIZE)
|
||||
return end_bio_subpage_eb_writepage(fs_info, bio);
|
||||
|
||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||
bio_for_each_segment_all(bvec, bio, iter_all) {
|
||||
struct page *page = bvec->bv_page;
|
||||
@ -5465,36 +5550,28 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
rcu_read_lock();
|
||||
eb = radix_tree_lookup(&fs_info->buffer_radix,
|
||||
start >> fs_info->sectorsize_bits);
|
||||
if (eb && atomic_inc_not_zero(&eb->refs)) {
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* Lock our eb's refs_lock to avoid races with
|
||||
* free_extent_buffer. When we get our eb it might be flagged
|
||||
* with EXTENT_BUFFER_STALE and another task running
|
||||
* free_extent_buffer might have seen that flag set,
|
||||
* eb->refs == 2, that the buffer isn't under IO (dirty and
|
||||
* writeback flags not set) and it's still in the tree (flag
|
||||
* EXTENT_BUFFER_TREE_REF set), therefore being in the process
|
||||
* of decrementing the extent buffer's reference count twice.
|
||||
* So here we could race and increment the eb's reference count,
|
||||
* clear its stale flag, mark it as dirty and drop our reference
|
||||
* before the other task finishes executing free_extent_buffer,
|
||||
* which would later result in an attempt to free an extent
|
||||
* buffer that is dirty.
|
||||
*/
|
||||
if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
|
||||
spin_lock(&eb->refs_lock);
|
||||
spin_unlock(&eb->refs_lock);
|
||||
}
|
||||
mark_extent_buffer_accessed(eb, NULL);
|
||||
return eb;
|
||||
eb = find_extent_buffer_nolock(fs_info, start);
|
||||
if (!eb)
|
||||
return NULL;
|
||||
/*
|
||||
* Lock our eb's refs_lock to avoid races with free_extent_buffer().
|
||||
* When we get our eb it might be flagged with EXTENT_BUFFER_STALE and
|
||||
* another task running free_extent_buffer() might have seen that flag
|
||||
* set, eb->refs == 2, that the buffer isn't under IO (dirty and
|
||||
* writeback flags not set) and it's still in the tree (flag
|
||||
* EXTENT_BUFFER_TREE_REF set), therefore being in the process of
|
||||
* decrementing the extent buffer's reference count twice. So here we
|
||||
* could race and increment the eb's reference count, clear its stale
|
||||
* flag, mark it as dirty and drop our reference before the other task
|
||||
* finishes executing free_extent_buffer, which would later result in
|
||||
* an attempt to free an extent buffer that is dirty.
|
||||
*/
|
||||
if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
|
||||
spin_lock(&eb->refs_lock);
|
||||
spin_unlock(&eb->refs_lock);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return NULL;
|
||||
mark_extent_buffer_accessed(eb, NULL);
|
||||
return eb;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
|
Loading…
Reference in New Issue
Block a user