forked from Minki/linux
btrfs: refactor btrfs_invalidatepage() for subpage support
This patch will refactor btrfs_invalidatepage() for the incoming subpage support. The involved modifications are: - Use while() loop instead of "goto again;" - Use single variable to determine whether to delete extent states Each branch will also have comments why we can or cannot delete the extent states - Do qgroup free and extent states deletion per-loop Current code can only work for PAGE_SIZE == sectorsize case. This refactor also makes it clear what we do for different sectors: - Sectors without ordered extent We're completely safe to remove all extent states for the sector(s) - Sectors with ordered extent, but no Private2 bit This means the endio has already been executed, we can't remove all extent states for the sector(s). - Sectors with ordere extent, still has Private2 bit This means we need to decrease the ordered extent accounting. And then it comes to two different variants: * We have finished and removed the ordered extent Then it's the same as "sectors without ordered extent" * We didn't finished the ordered extent We can remove some extent states, but not all. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
c095f3333f
commit
3b8358407a
175
fs/btrfs/inode.c
175
fs/btrfs/inode.c
@ -8318,15 +8318,11 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
|
|||||||
{
|
{
|
||||||
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
|
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
|
||||||
struct extent_io_tree *tree = &inode->io_tree;
|
struct extent_io_tree *tree = &inode->io_tree;
|
||||||
struct btrfs_ordered_extent *ordered;
|
|
||||||
struct extent_state *cached_state = NULL;
|
struct extent_state *cached_state = NULL;
|
||||||
u64 page_start = page_offset(page);
|
u64 page_start = page_offset(page);
|
||||||
u64 page_end = page_start + PAGE_SIZE - 1;
|
u64 page_end = page_start + PAGE_SIZE - 1;
|
||||||
u64 start;
|
u64 cur;
|
||||||
u64 end;
|
|
||||||
int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
|
int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
|
||||||
bool found_ordered = false;
|
|
||||||
bool completed_ordered = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have page locked so no new ordered extent can be created on this
|
* We have page locked so no new ordered extent can be created on this
|
||||||
@ -8350,93 +8346,120 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
|
|||||||
if (!inode_evicting)
|
if (!inode_evicting)
|
||||||
lock_extent_bits(tree, page_start, page_end, &cached_state);
|
lock_extent_bits(tree, page_start, page_end, &cached_state);
|
||||||
|
|
||||||
start = page_start;
|
cur = page_start;
|
||||||
again:
|
while (cur < page_end) {
|
||||||
ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
|
struct btrfs_ordered_extent *ordered;
|
||||||
if (ordered) {
|
bool delete_states;
|
||||||
found_ordered = true;
|
u64 range_end;
|
||||||
end = min(page_end,
|
|
||||||
ordered->file_offset + ordered->num_bytes - 1);
|
ordered = btrfs_lookup_first_ordered_range(inode, cur,
|
||||||
|
page_end + 1 - cur);
|
||||||
|
if (!ordered) {
|
||||||
|
range_end = page_end;
|
||||||
|
/*
|
||||||
|
* No ordered extent covering this range, we are safe
|
||||||
|
* to delete all extent states in the range.
|
||||||
|
*/
|
||||||
|
delete_states = true;
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
if (ordered->file_offset > cur) {
|
||||||
|
/*
|
||||||
|
* There is a range between [cur, oe->file_offset) not
|
||||||
|
* covered by any ordered extent.
|
||||||
|
* We are safe to delete all extent states, and handle
|
||||||
|
* the ordered extent in the next iteration.
|
||||||
|
*/
|
||||||
|
range_end = ordered->file_offset - 1;
|
||||||
|
delete_states = true;
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
range_end = min(ordered->file_offset + ordered->num_bytes - 1,
|
||||||
|
page_end);
|
||||||
|
if (!PagePrivate2(page)) {
|
||||||
|
/*
|
||||||
|
* If Private2 is cleared, it means endio has already
|
||||||
|
* been executed for the range.
|
||||||
|
* We can't delete the extent states as
|
||||||
|
* btrfs_finish_ordered_io() may still use some of them.
|
||||||
|
*/
|
||||||
|
delete_states = false;
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
ClearPagePrivate2(page);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IO on this page will never be started, so we need to account
|
* IO on this page will never be started, so we need to account
|
||||||
* for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
|
* for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
|
||||||
* here, must leave that up for the ordered extent completion.
|
* here, must leave that up for the ordered extent completion.
|
||||||
|
*
|
||||||
|
* This will also unlock the range for incoming
|
||||||
|
* btrfs_finish_ordered_io().
|
||||||
*/
|
*/
|
||||||
if (!inode_evicting)
|
if (!inode_evicting)
|
||||||
clear_extent_bit(tree, start, end,
|
clear_extent_bit(tree, cur, range_end,
|
||||||
EXTENT_DELALLOC |
|
EXTENT_DELALLOC |
|
||||||
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
|
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
|
||||||
EXTENT_DEFRAG, 1, 0, &cached_state);
|
EXTENT_DEFRAG, 1, 0, &cached_state);
|
||||||
|
|
||||||
|
spin_lock_irq(&inode->ordered_tree.lock);
|
||||||
|
set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
|
||||||
|
ordered->truncated_len = min(ordered->truncated_len,
|
||||||
|
cur - ordered->file_offset);
|
||||||
|
spin_unlock_irq(&inode->ordered_tree.lock);
|
||||||
|
|
||||||
|
if (btrfs_dec_test_ordered_pending(inode, &ordered,
|
||||||
|
cur, range_end + 1 - cur, 1)) {
|
||||||
|
btrfs_finish_ordered_io(ordered);
|
||||||
|
/*
|
||||||
|
* The ordered extent has finished, now we're again
|
||||||
|
* safe to delete all extent states of the range.
|
||||||
|
*/
|
||||||
|
delete_states = true;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* btrfs_finish_ordered_io() will get executed by endio
|
||||||
|
* of other pages, thus we can't delete extent states
|
||||||
|
* anymore
|
||||||
|
*/
|
||||||
|
delete_states = false;
|
||||||
|
}
|
||||||
|
next:
|
||||||
|
if (ordered)
|
||||||
|
btrfs_put_ordered_extent(ordered);
|
||||||
/*
|
/*
|
||||||
* A page with Private2 bit means no bio has been submitted
|
* Qgroup reserved space handler
|
||||||
* covering the page, thus we have to manually do the ordered
|
* Sector(s) here will be either:
|
||||||
* extent accounting.
|
|
||||||
*
|
*
|
||||||
* For page without Private2, the ordered extent accounting is
|
* 1) Already written to disk or bio already finished
|
||||||
* done in its endio function of the submitted bio.
|
* Then its QGROUP_RESERVED bit in io_tree is already cleared.
|
||||||
|
* Qgroup will be handled by its qgroup_record then.
|
||||||
|
* btrfs_qgroup_free_data() call will do nothing here.
|
||||||
|
*
|
||||||
|
* 2) Not written to disk yet
|
||||||
|
* Then btrfs_qgroup_free_data() call will clear the
|
||||||
|
* QGROUP_RESERVED bit of its io_tree, and free the qgroup
|
||||||
|
* reserved data space.
|
||||||
|
* Since the IO will never happen for this page.
|
||||||
*/
|
*/
|
||||||
if (TestClearPagePrivate2(page)) {
|
btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
|
||||||
spin_lock_irq(&inode->ordered_tree.lock);
|
|
||||||
set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
|
|
||||||
ordered->truncated_len = min(ordered->truncated_len,
|
|
||||||
start - ordered->file_offset);
|
|
||||||
spin_unlock_irq(&inode->ordered_tree.lock);
|
|
||||||
|
|
||||||
if (btrfs_dec_test_ordered_pending(inode, &ordered,
|
|
||||||
start,
|
|
||||||
end - start + 1, 1)) {
|
|
||||||
btrfs_finish_ordered_io(ordered);
|
|
||||||
completed_ordered = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
btrfs_put_ordered_extent(ordered);
|
|
||||||
if (!inode_evicting) {
|
if (!inode_evicting) {
|
||||||
cached_state = NULL;
|
clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
|
||||||
lock_extent_bits(tree, start, end,
|
|
||||||
&cached_state);
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end + 1;
|
|
||||||
if (start < page_end)
|
|
||||||
goto again;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Qgroup reserved space handler
|
|
||||||
* Page here will be either
|
|
||||||
* 1) Already written to disk or ordered extent already submitted
|
|
||||||
* Then its QGROUP_RESERVED bit in io_tree is already cleaned.
|
|
||||||
* Qgroup will be handled by its qgroup_record then.
|
|
||||||
* btrfs_qgroup_free_data() call will do nothing here.
|
|
||||||
*
|
|
||||||
* 2) Not written to disk yet
|
|
||||||
* Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
|
|
||||||
* bit of its io_tree, and free the qgroup reserved data space.
|
|
||||||
* Since the IO will never happen for this page.
|
|
||||||
*/
|
|
||||||
btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
|
|
||||||
if (!inode_evicting) {
|
|
||||||
bool delete = true;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* If there's an ordered extent for this range and we have not
|
|
||||||
* finished it ourselves, we must leave EXTENT_DELALLOC_NEW set
|
|
||||||
* in the range for the ordered extent completion. We must also
|
|
||||||
* not delete the range, otherwise we would lose that bit (and
|
|
||||||
* any other bits set in the range). Make sure EXTENT_UPTODATE
|
|
||||||
* is cleared if we don't delete, otherwise it can lead to
|
|
||||||
* corruptions if the i_size is extented later.
|
|
||||||
*/
|
|
||||||
if (found_ordered && !completed_ordered)
|
|
||||||
delete = false;
|
|
||||||
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
|
|
||||||
EXTENT_DELALLOC | EXTENT_UPTODATE |
|
EXTENT_DELALLOC | EXTENT_UPTODATE |
|
||||||
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
|
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
|
||||||
delete, &cached_state);
|
delete_states, &cached_state);
|
||||||
|
}
|
||||||
__btrfs_releasepage(page, GFP_NOFS);
|
cur = range_end + 1;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* We have iterated through all ordered extents of the page, the page
|
||||||
|
* should not have Private2 anymore, or the above iteration does
|
||||||
|
* something wrong.
|
||||||
|
*/
|
||||||
|
ASSERT(!PagePrivate2(page));
|
||||||
|
if (!inode_evicting)
|
||||||
|
__btrfs_releasepage(page, GFP_NOFS);
|
||||||
ClearPageChecked(page);
|
ClearPageChecked(page);
|
||||||
clear_page_extent_mapped(page);
|
clear_page_extent_mapped(page);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user