mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 04:02:20 +00:00
btrfs: only unlock the to-be-submitted ranges inside a folio
[SUBPAGE COMPRESSION LIMITS] Currently inside writepage_delalloc(), if a delalloc range is going to be submitted asynchronously (inline or compression, the page dirty/writeback/unlock are all handled in at different time, not at the submission time), then we return 1 and extent_writepage() will skip the submission. This is fine if every sector matches page size, but if a sector is smaller than page size (aka, subpage case), then it can be very problematic, for example for the following 64K page: 0 16K 32K 48K 64K |/| |///////| |/| | | 4K 52K Where |/| is the dirty range we need to submit. In the above case, we need the following different handling for the 3 ranges: - [0, 4K) needs to be submitted for regular write A single sector cannot be compressed. - [16K, 32K) needs to be submitted for compressed write - [48K, 52K) needs to be submitted for regular write. Above, if we try to submit [16K, 32K) for compressed write, we will return 1 and immediately, and without submitting the remaining [48K, 52K) range. Furthermore, since extent_writepage() will exit without unlocking any sectors, the submitted range [0, 4K) will not have sector unlocked. That's the reason why for now subpage is only allowed for full page range. [ENHANCEMENT] - Introduce a submission bitmap at btrfs_bio_ctrl::submit_bitmap This records which sectors will be submitted by extent_writepage_io(). This allows us to track which sectors needs to be submitted thus later to be properly unlocked. For asynchronously submitted range (inline/compression), the corresponding bits will be cleared from that bitmap. - Only return 1 if no sector needs to be submitted in writepage_delalloc() - Only submit sectors marked by submission bitmap inside extent_writepage_io() So we won't touch the asynchronously submitted part. - Introduce btrfs_folio_end_writer_lock_bitmap() helper This will only unlock the involved sectors specified by @bitmap parameter, to avoid touching the range asynchronously submitted. Please note that, since subpage compression is still limited to page aligned range, this change is only a preparation for future sector perfect compression support for subpage. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
49a9907368
commit
bd610c0937
@ -101,6 +101,13 @@ struct btrfs_bio_ctrl {
|
||||
blk_opf_t opf;
|
||||
btrfs_bio_end_io_t end_io_func;
|
||||
struct writeback_control *wbc;
|
||||
|
||||
/*
|
||||
* The sectors of the page which are going to be submitted by
|
||||
* extent_writepage_io().
|
||||
* This is to avoid touching ranges covered by compression/inline.
|
||||
*/
|
||||
unsigned long submit_bitmap;
|
||||
};
|
||||
|
||||
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
|
||||
@ -1106,9 +1113,10 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
|
||||
*/
|
||||
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
struct folio *folio,
|
||||
struct writeback_control *wbc)
|
||||
struct btrfs_bio_ctrl *bio_ctrl)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
|
||||
struct writeback_control *wbc = bio_ctrl->wbc;
|
||||
const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
|
||||
const u64 page_start = folio_pos(folio);
|
||||
const u64 page_end = page_start + folio_size(folio) - 1;
|
||||
@ -1123,6 +1131,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
u64 delalloc_to_write = 0;
|
||||
int ret = 0;
|
||||
|
||||
/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
|
||||
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
|
||||
ASSERT(fs_info->sectors_per_page > 1);
|
||||
btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap);
|
||||
} else {
|
||||
bio_ctrl->submit_bitmap = 1;
|
||||
}
|
||||
|
||||
/* Lock all (subpage) delalloc ranges inside the folio first. */
|
||||
while (delalloc_start < page_end) {
|
||||
delalloc_end = page_end;
|
||||
@ -1190,22 +1206,18 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
}
|
||||
|
||||
/*
|
||||
* We can hit btrfs_run_delalloc_range() with >0 return value.
|
||||
*
|
||||
* This happens when either the IO is already done and folio
|
||||
* unlocked (inline) or the IO submission and folio unlock would
|
||||
* be handled as async (compression).
|
||||
*
|
||||
* Inline is only possible for regular sectorsize for now.
|
||||
*
|
||||
* Compression is possible for both subpage and regular cases,
|
||||
* but even for subpage compression only happens for page aligned
|
||||
* range, thus the found delalloc range must go beyond current
|
||||
* folio.
|
||||
* We have some ranges that's going to be submitted asynchronously
|
||||
* (compression or inline). These range have their own control
|
||||
* on when to unlock the pages. We should not touch them
|
||||
* anymore, so clear the range from the submission bitmap.
|
||||
*/
|
||||
if (ret > 0)
|
||||
ASSERT(!is_subpage || found_start + found_len >= page_end);
|
||||
|
||||
if (ret > 0) {
|
||||
unsigned int start_bit = (found_start - page_start) >>
|
||||
fs_info->sectorsize_bits;
|
||||
unsigned int end_bit = (min(page_end + 1, found_start + found_len) -
|
||||
page_start) >> fs_info->sectorsize_bits;
|
||||
bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
|
||||
}
|
||||
/*
|
||||
* Above btrfs_run_delalloc_range() may have unlocked the folio,
|
||||
* thus for the last range, we cannot touch the folio anymore.
|
||||
@ -1230,10 +1242,10 @@ out:
|
||||
DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* If btrfs_run_dealloc_range() already started I/O and unlocked
|
||||
* the folios, we just need to account for them here.
|
||||
* If all ranges are submitted asynchronously, we just need to account
|
||||
* for them here.
|
||||
*/
|
||||
if (ret == 1) {
|
||||
if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) {
|
||||
wbc->nr_to_write -= delalloc_to_write;
|
||||
return 1;
|
||||
}
|
||||
@ -1331,15 +1343,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
unsigned long range_bitmap = 0;
|
||||
/*
|
||||
* This is the default value for sectorsize == PAGE_SIZE case.
|
||||
* We known we need to write the dirty sector (aka the page),
|
||||
* even if the page is not dirty (we cleared it before entering).
|
||||
*
|
||||
* For subpage cases we will get the correct bitmap later.
|
||||
*/
|
||||
unsigned long dirty_bitmap = 1;
|
||||
unsigned int bitmap_size = 1;
|
||||
bool submitted_io = false;
|
||||
const u64 folio_start = folio_pos(folio);
|
||||
u64 cur;
|
||||
@ -1357,18 +1360,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
|
||||
ASSERT(fs_info->sectors_per_page > 1);
|
||||
btrfs_get_subpage_dirty_bitmap(fs_info, folio, &dirty_bitmap);
|
||||
bitmap_size = fs_info->sectors_per_page;
|
||||
}
|
||||
for (cur = start; cur < start + len; cur += fs_info->sectorsize)
|
||||
set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
|
||||
bitmap_and(&dirty_bitmap, &dirty_bitmap, &range_bitmap, bitmap_size);
|
||||
bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
|
||||
fs_info->sectors_per_page);
|
||||
|
||||
bio_ctrl->end_io_func = end_bbio_data_write;
|
||||
|
||||
for_each_set_bit(bit, &dirty_bitmap, bitmap_size) {
|
||||
for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
|
||||
cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);
|
||||
|
||||
if (cur >= i_size) {
|
||||
@ -1421,6 +1420,7 @@ out:
|
||||
static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
|
||||
{
|
||||
struct inode *inode = folio->mapping->host;
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
const u64 page_start = folio_pos(folio);
|
||||
int ret;
|
||||
size_t pg_offset;
|
||||
@ -1442,11 +1442,16 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
|
||||
if (folio->index == end_index)
|
||||
folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);
|
||||
|
||||
/*
|
||||
* Default to unlock the whole folio.
|
||||
* The proper bitmap can only be initialized until writepage_delalloc().
|
||||
*/
|
||||
bio_ctrl->submit_bitmap = (unsigned long)-1;
|
||||
ret = set_folio_extent_mapped(folio);
|
||||
if (ret < 0)
|
||||
goto done;
|
||||
|
||||
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl->wbc);
|
||||
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
if (ret)
|
||||
@ -1466,8 +1471,11 @@ done:
|
||||
mapping_set_error(folio->mapping, ret);
|
||||
}
|
||||
|
||||
btrfs_folio_end_writer_lock(inode_to_fs_info(inode), folio,
|
||||
page_start, PAGE_SIZE);
|
||||
/*
|
||||
* Only unlock ranges that are submitted. As there can be some async
|
||||
* submitted ranges inside the folio.
|
||||
*/
|
||||
btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
|
||||
ASSERT(ret <= 0);
|
||||
return ret;
|
||||
}
|
||||
@ -2210,6 +2218,11 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
|
||||
if (pages_dirty && folio != locked_folio)
|
||||
ASSERT(folio_test_dirty(folio));
|
||||
|
||||
/*
|
||||
* Set the submission bitmap to submit all sectors.
|
||||
* extent_writepage_io() will do the truncation correctly.
|
||||
*/
|
||||
bio_ctrl.submit_bitmap = (unsigned long)-1;
|
||||
ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
|
||||
&bio_ctrl, i_size);
|
||||
if (ret == 1)
|
||||
|
@ -424,6 +424,39 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
folio_unlock(folio);
|
||||
}
|
||||
|
||||
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, unsigned long bitmap)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
|
||||
unsigned long flags;
|
||||
bool last = false;
|
||||
int cleared = 0;
|
||||
int bit;
|
||||
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (atomic_read(&subpage->writers) == 0) {
|
||||
/* No writers, locked by plain lock_page(). */
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
|
||||
if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
|
||||
cleared++;
|
||||
}
|
||||
ASSERT(atomic_read(&subpage->writers) >= cleared);
|
||||
last = atomic_sub_and_test(cleared, &subpage->writers);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
if (last)
|
||||
folio_unlock(folio);
|
||||
}
|
||||
|
||||
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
|
||||
bitmap_test_range_all_set(subpage->bitmaps, \
|
||||
fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
|
||||
|
@ -106,6 +106,8 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, unsigned long bitmap);
|
||||
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 search_start,
|
||||
u64 *found_start_ret, u32 *found_len_ret);
|
||||
|
Loading…
Reference in New Issue
Block a user