diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8c57af3702fa..d3f2623a2af0 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1343,10 +1343,10 @@ static int prepare_uptodate_page(struct inode *inode, /* * Since btrfs_readpage() will unlock the page before it - * returns, there is a window where btrfs_releasepage() can - * be called to release the page. - * Here we check both inode mapping and PagePrivate() to - * make sure the page was not released. + * returns, there is a window where btrfs_releasepage() can be + * called to release the page. Here we check both inode + * mapping and PagePrivate() to make sure the page was not + * released. * * The private flag check is essential for subpage as we need * to store extra bitmap using page->private. diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cade9a2d561e..034fe81db5c1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8426,11 +8426,47 @@ static void btrfs_readahead(struct readahead_control *rac) extent_readahead(rac); } +/* + * For releasepage() and invalidatepage() we have a race window where + * end_page_writeback() is called but the subpage spinlock is not yet released. + * If we continue to release/invalidate the page, we could cause use-after-free + * for subpage spinlock. So this function is to spin and wait for subpage + * spinlock. + */ +static void wait_subpage_spinlock(struct page *page) +{ + struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_subpage *subpage; + + if (fs_info->sectorsize == PAGE_SIZE) + return; + + ASSERT(PagePrivate(page) && page->private); + subpage = (struct btrfs_subpage *)page->private; + + /* + * This may look insane as we just acquire the spinlock and release it, + * without doing anything. But we just want to make sure no one is + * still holding the subpage spinlock. + * And since the page is not dirty nor writeback, and we have page + * locked, the only possible way to hold a spinlock is from the endio + * function to clear page writeback. + * + * Here we just acquire the spinlock so that all existing callers + * should exit and we're safe to release/invalidate the page. + */ + spin_lock_irq(&subpage->lock); + spin_unlock_irq(&subpage->lock); +} + static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { int ret = try_release_extent_mapping(page, gfp_flags); - if (ret == 1) + + if (ret == 1) { + wait_subpage_spinlock(page); clear_page_extent_mapped(page); + } return ret; } @@ -8494,6 +8530,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, * do double ordered extent accounting on the same page. */ wait_on_page_writeback(page); + wait_subpage_spinlock(page); /* * For subpage case, we have call sites like diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 3c311841cdee..cb10e56ee31e 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -435,8 +435,10 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&subpage->lock, flags); subpage->writeback_bitmap &= ~tmp; - if (subpage->writeback_bitmap == 0) + if (subpage->writeback_bitmap == 0) { + ASSERT(PageWriteback(page)); end_page_writeback(page); + } spin_unlock_irqrestore(&subpage->lock, flags); }