forked from Minki/linux
cc1d0d93d5
[BUG] When running fsstress with subpage RW support, there are random BUG_ON()s triggered with the following trace: kernel BUG at fs/btrfs/file-item.c:667! Internal error: Oops - BUG: 0 [#1] SMP CPU: 1 PID: 3486 Comm: kworker/u13:2 5.11.0-rc4-custom+ #43 Hardware name: Radxa ROCK Pi 4B (DT) Workqueue: btrfs-worker-high btrfs_work_helper [btrfs] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--) pc : btrfs_csum_one_bio+0x420/0x4e0 [btrfs] lr : btrfs_csum_one_bio+0x400/0x4e0 [btrfs] Call trace: btrfs_csum_one_bio+0x420/0x4e0 [btrfs] btrfs_submit_bio_start+0x20/0x30 [btrfs] run_one_async_start+0x28/0x44 [btrfs] btrfs_work_helper+0x128/0x1b4 [btrfs] process_one_work+0x22c/0x430 worker_thread+0x70/0x3a0 kthread+0x13c/0x140 ret_from_fork+0x10/0x30 [CAUSE] Above BUG_ON() means there is some bio range which doesn't have ordered extent, which indeed is worth a BUG_ON(). Unlike regular sectorsize == PAGE_SIZE case, in subpage we have extra subpage dirty bitmap to record which range is dirty and should be written back. This means, if we submit bio for a subpage range, we do not only need to clear page dirty, but also need to clear subpage dirty bits. In __extent_writepage_io(), we will call btrfs_page_clear_dirty() for any range we submit a bio. But there is loophole, if we hit a range which is beyond i_size, we just call btrfs_writepage_endio_finish_ordered() to finish the ordered io, then break out, without clearing the subpage dirty. This means, if we hit above branch, the subpage dirty bits are still there, if other range of the page get dirtied and we need to writeback that page again, we will submit bio for the old range, leaving a wild bio range which doesn't have ordered extent. [FIX] Fix it by always calling btrfs_page_clear_dirty() in __extent_writepage_io(). Also to avoid such problem from happening again, add a new assert, btrfs_page_assert_not_dirty(), to make sure both page dirty and subpage dirty bits are cleared before exiting __extent_writepage_io(). Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
133 lines
4.6 KiB
C
133 lines
4.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef BTRFS_SUBPAGE_H
|
|
#define BTRFS_SUBPAGE_H
|
|
|
|
#include <linux/spinlock.h>
|
|
|
|
/*
|
|
* Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap
|
|
* is sufficient. Regular bitmap_* is not used due to size reasons.
|
|
*/
|
|
#define BTRFS_SUBPAGE_BITMAP_SIZE 16
|
|
|
|
/*
|
|
* Structure to trace status of each sector inside a page, attached to
|
|
* page::private for both data and metadata inodes.
|
|
*/
|
|
struct btrfs_subpage {
|
|
/* Common members for both data and metadata pages */
|
|
spinlock_t lock;
|
|
u16 uptodate_bitmap;
|
|
u16 error_bitmap;
|
|
u16 dirty_bitmap;
|
|
u16 writeback_bitmap;
|
|
/*
|
|
* Both data and metadata needs to track how many readers are for the
|
|
* page.
|
|
* Data relies on @readers to unlock the page when last reader finished.
|
|
* While metadata doesn't need page unlock, it needs to prevent
|
|
* page::private get cleared before the last end_page_read().
|
|
*/
|
|
atomic_t readers;
|
|
union {
|
|
/*
|
|
* Structures only used by metadata
|
|
*
|
|
* @eb_refs should only be operated under private_lock, as it
|
|
* manages whether the subpage can be detached.
|
|
*/
|
|
atomic_t eb_refs;
|
|
/* Structures only used by data */
|
|
struct {
|
|
atomic_t writers;
|
|
|
|
/* Tracke pending ordered extent in this sector */
|
|
u16 ordered_bitmap;
|
|
};
|
|
};
|
|
};
|
|
|
|
enum btrfs_subpage_type {
|
|
BTRFS_SUBPAGE_METADATA,
|
|
BTRFS_SUBPAGE_DATA,
|
|
};
|
|
|
|
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, enum btrfs_subpage_type type);
|
|
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
|
struct page *page);
|
|
|
|
/* Allocate additional data where page represents more than one sector */
|
|
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
|
struct btrfs_subpage **ret,
|
|
enum btrfs_subpage_type type);
|
|
void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
|
|
|
void btrfs_page_inc_eb_refs(const struct btrfs_fs_info *fs_info,
|
|
struct page *page);
|
|
void btrfs_page_dec_eb_refs(const struct btrfs_fs_info *fs_info,
|
|
struct page *page);
|
|
|
|
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
/*
|
|
* Template for subpage related operations.
|
|
*
|
|
* btrfs_subpage_*() are for call sites where the page has subpage attached and
|
|
* the range is ensured to be inside the page.
|
|
*
|
|
* btrfs_page_*() are for call sites where the page can either be subpage
|
|
* specific or regular page. The function will handle both cases.
|
|
* But the range still needs to be inside the page.
|
|
*
|
|
* btrfs_page_clamp_*() are similar to btrfs_page_*(), except the range doesn't
|
|
* need to be inside the page. Those functions will truncate the range
|
|
* automatically.
|
|
*/
|
|
#define DECLARE_BTRFS_SUBPAGE_OPS(name) \
|
|
void btrfs_subpage_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
void btrfs_subpage_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len); \
|
|
bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(error);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
|
|
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
|
|
|
|
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
|
struct page *page, u64 start, u32 len);
|
|
|
|
void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
|
struct page *page);
|
|
|
|
#endif
|