mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 20:51:44 +00:00
for-5.11-rc2-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl/0cI8ACgkQxWXV+ddt WDspQw/8DcC8zhGgunk0m2kcXd6dFOGbsr3hNGCsgUSKESRw6AgTZ0rJf/QLjayF /vaJWzQW9ijfZ92fWZS+mrmskk0N8RFOsEvkCRLesgRaasbrkchLBo5HGQasOBEV LXyU878GrBkNaHzClJz+JdU26i0d17BFdddgtZVQ1St9Wd9ecc7Q6iqG80RWFeE7 uVbhv+QjocM3EieOnwIy5Mz6jZgJLYwqw7/y2njKduBeJtbt1K1j/y7IJk0WFMUM 8eUpDL6vlAHB8FjV2wWOzO46bbEaUpaBADM6yabrq0lnM0kr7Rb+WV/WSLM/AZ3g Hzs4qROOEP+zjfZ5nYjJQDJRMpSipZomsUY5uMZnhRxlZuHPaoBotRRzs5AIZYj2 BnkfucOcjxS/JTBD//ltJXE8RxbMIyMBBBipbBwqmxOkR9gM9BPuJ6iJPfUX//gG 1GHJ+FPns8ua3JW21ih6H31xNEPS36tsywvE8yCEtEWMxCFCBwgGu+4D8KpGBjtY ySFxkxxAbTuFi9fqSE/mBC+6lpbVTO0OvizuoEQh8C2izkXRbDsDVgPN8d7rCW7h Cdox4DUp61sNf+G3ll9Dv9ceAXroZTVRTHGjlav6NAFpydz3yPo5x54Ex7S+k3oN BAcZEl1Tl3hz4WxF8Ywc+yJ8n8l9AVa3KcYRXVbyVjTGg+JjU94= =jlQf -----END PGP SIGNATURE----- Merge tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "A few more fixes that arrived before the end of the year: - a bunch of fixes related to transaction handle lifetime wrt various operations (umount, remount, qgroup scan, orphan cleanup) - async discard scheduling fixes - fix item size calculation when item keys collide for extend refs (hardlinks) - fix qgroup flushing from running transaction - fix send, wrong file path when there is an inode with a pending rmdir - fix deadlock when cloning inline extent and low on free metadata space" * tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: run delayed iputs when remounting RO to avoid leaking them btrfs: add assertion for empty list of transactions at late stage of umount btrfs: fix race between RO remount and the cleaner task btrfs: fix transaction leak and crash after cleaning up orphans on RO mount btrfs: fix transaction leak and crash after RO remount caused by qgroup rescan btrfs: merge critical sections of discard lock in workfn btrfs: fix racy access to discard_ctl data btrfs: fix async discard stall btrfs: tests: initialize test inodes location btrfs: send: fix wrong file path when there is an inode with a pending rmdir btrfs: qgroup: don't try to wait flushing if we're already holding a transaction btrfs: correctly calculate item size used when item key collision happens btrfs: fix deadlock when cloning inline extent and low on free metadata space
This commit is contained in:
commit
71c061d244
@ -42,6 +42,15 @@ enum {
|
||||
* to an inode.
|
||||
*/
|
||||
BTRFS_INODE_NO_XATTRS,
|
||||
/*
|
||||
* Set when we are in a context where we need to start a transaction and
|
||||
* have dirty pages with the respective file range locked. This is to
|
||||
* ensure that when reserving space for the transaction, if we are low
|
||||
* on available space and need to flush delalloc, we will not flush
|
||||
* delalloc for this inode, because that could result in a deadlock (on
|
||||
* the file range, inode's io_tree).
|
||||
*/
|
||||
BTRFS_INODE_NO_DELALLOC_FLUSH,
|
||||
};
|
||||
|
||||
/* in memory btrfs inode */
|
||||
|
@ -2555,8 +2555,14 @@ out:
|
||||
* @p: Holds all btree nodes along the search path
|
||||
* @root: The root node of the tree
|
||||
* @key: The key we are looking for
|
||||
* @ins_len: Indicates purpose of search, for inserts it is 1, for
|
||||
* deletions it's -1. 0 for plain searches
|
||||
* @ins_len: Indicates purpose of search:
|
||||
* >0 for inserts it's size of item inserted (*)
|
||||
* <0 for deletions
|
||||
* 0 for plain searches, not modifying the tree
|
||||
*
|
||||
* (*) If size of item inserted doesn't include
|
||||
* sizeof(struct btrfs_item), then p->search_for_extension must
|
||||
* be set.
|
||||
* @cow: boolean should CoW operations be performed. Must always be 1
|
||||
* when modifying the tree.
|
||||
*
|
||||
@ -2717,6 +2723,20 @@ cow_done:
|
||||
|
||||
if (level == 0) {
|
||||
p->slots[level] = slot;
|
||||
/*
|
||||
* Item key already exists. In this case, if we are
|
||||
* allowed to insert the item (for example, in dir_item
|
||||
* case, item key collision is allowed), it will be
|
||||
* merged with the original item. Only the item size
|
||||
* grows, no new btrfs item will be added. If
|
||||
* search_for_extension is not set, ins_len already
|
||||
* accounts the size btrfs_item, deduct it here so leaf
|
||||
* space check will be correct.
|
||||
*/
|
||||
if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
|
||||
ASSERT(ins_len >= sizeof(struct btrfs_item));
|
||||
ins_len -= sizeof(struct btrfs_item);
|
||||
}
|
||||
if (ins_len > 0 &&
|
||||
btrfs_leaf_free_space(b) < ins_len) {
|
||||
if (write_lock_level < 1) {
|
||||
|
@ -131,6 +131,8 @@ enum {
|
||||
* defrag
|
||||
*/
|
||||
BTRFS_FS_STATE_REMOUNTING,
|
||||
/* Filesystem in RO mode */
|
||||
BTRFS_FS_STATE_RO,
|
||||
/* Track if a transaction abort has been reported on this filesystem */
|
||||
BTRFS_FS_STATE_TRANS_ABORTED,
|
||||
/*
|
||||
@ -367,6 +369,12 @@ struct btrfs_path {
|
||||
unsigned int search_commit_root:1;
|
||||
unsigned int need_commit_sem:1;
|
||||
unsigned int skip_release_on_error:1;
|
||||
/*
|
||||
* Indicate that new item (btrfs_search_slot) is extending already
|
||||
* existing item and ins_len contains only the data size and not item
|
||||
* header (ie. sizeof(struct btrfs_item) is not included).
|
||||
*/
|
||||
unsigned int search_for_extension:1;
|
||||
};
|
||||
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
||||
sizeof(struct btrfs_item))
|
||||
@ -2885,10 +2893,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
|
||||
* If we remount the fs to be R/O or umount the fs, the cleaner needn't do
|
||||
* anything except sleeping. This function is used to check the status of
|
||||
* the fs.
|
||||
* We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
|
||||
* since setting and checking for SB_RDONLY in the superblock's flags is not
|
||||
* atomic.
|
||||
*/
|
||||
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
|
||||
return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
|
||||
btrfs_fs_closing(fs_info);
|
||||
}
|
||||
|
||||
static inline void btrfs_set_sb_rdonly(struct super_block *sb)
|
||||
{
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
|
||||
}
|
||||
|
||||
static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
|
||||
{
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
|
||||
}
|
||||
|
||||
/* tree mod log functions from ctree.c */
|
||||
@ -3073,7 +3097,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
|
||||
u32 min_type);
|
||||
|
||||
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
|
||||
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr);
|
||||
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
bool in_reclaim_context);
|
||||
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
unsigned int extra_bits,
|
||||
struct extent_state **cached_state);
|
||||
|
@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
* flush all outstanding I/O and inode extent mappings before the
|
||||
* copy operation is declared as being finished
|
||||
*/
|
||||
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
|
||||
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
|
||||
if (ret) {
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
return ret;
|
||||
|
@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group(
|
||||
static struct btrfs_block_group *peek_discard_list(
|
||||
struct btrfs_discard_ctl *discard_ctl,
|
||||
enum btrfs_discard_state *discard_state,
|
||||
int *discard_index)
|
||||
int *discard_index, u64 now)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
const u64 now = ktime_get_ns();
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
again:
|
||||
block_group = find_next_block_group(discard_ctl, now);
|
||||
|
||||
if (block_group && now > block_group->discard_eligible_time) {
|
||||
if (block_group && now >= block_group->discard_eligible_time) {
|
||||
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
|
||||
block_group->used != 0) {
|
||||
if (btrfs_is_block_group_data_only(block_group))
|
||||
@ -222,12 +221,11 @@ again:
|
||||
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
|
||||
}
|
||||
discard_ctl->block_group = block_group;
|
||||
}
|
||||
if (block_group) {
|
||||
*discard_state = block_group->discard_state;
|
||||
*discard_index = block_group->discard_index;
|
||||
} else {
|
||||
block_group = NULL;
|
||||
}
|
||||
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
|
||||
return block_group;
|
||||
@ -330,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
btrfs_discard_schedule_work(discard_ctl, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_schedule_work - responsible for scheduling the discard work
|
||||
* @discard_ctl: discard control
|
||||
* @override: override the current timer
|
||||
*
|
||||
* Discards are issued by a delayed workqueue item. @override is used to
|
||||
* update the current delay as the baseline delay interval is reevaluated on
|
||||
* transaction commit. This is also maxed with any other rate limit.
|
||||
*/
|
||||
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
bool override)
|
||||
static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
u64 now, bool override)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
const u64 now = ktime_get_ns();
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
|
||||
if (!btrfs_run_discard_work(discard_ctl))
|
||||
goto out;
|
||||
|
||||
return;
|
||||
if (!override && delayed_work_pending(&discard_ctl->work))
|
||||
goto out;
|
||||
return;
|
||||
|
||||
block_group = find_next_block_group(discard_ctl, now);
|
||||
if (block_group) {
|
||||
@ -393,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
mod_delayed_work(discard_ctl->discard_workers,
|
||||
&discard_ctl->work, nsecs_to_jiffies(delay));
|
||||
}
|
||||
out:
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_discard_schedule_work - responsible for scheduling the discard work
|
||||
* @discard_ctl: discard control
|
||||
* @override: override the current timer
|
||||
*
|
||||
* Discards are issued by a delayed workqueue item. @override is used to
|
||||
* update the current delay as the baseline delay interval is reevaluated on
|
||||
* transaction commit. This is also maxed with any other rate limit.
|
||||
*/
|
||||
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
bool override)
|
||||
{
|
||||
const u64 now = ktime_get_ns();
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
__btrfs_discard_schedule_work(discard_ctl, now, override);
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
@ -438,13 +440,18 @@ static void btrfs_discard_workfn(struct work_struct *work)
|
||||
int discard_index = 0;
|
||||
u64 trimmed = 0;
|
||||
u64 minlen = 0;
|
||||
u64 now = ktime_get_ns();
|
||||
|
||||
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
|
||||
|
||||
block_group = peek_discard_list(discard_ctl, &discard_state,
|
||||
&discard_index);
|
||||
&discard_index, now);
|
||||
if (!block_group || !btrfs_run_discard_work(discard_ctl))
|
||||
return;
|
||||
if (now < block_group->discard_eligible_time) {
|
||||
btrfs_discard_schedule_work(discard_ctl, false);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Perform discarding */
|
||||
minlen = discard_minlen[discard_index];
|
||||
@ -474,13 +481,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
|
||||
discard_ctl->discard_extent_bytes += trimmed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Updated without locks as this is inside the workfn and nothing else
|
||||
* is reading the values
|
||||
*/
|
||||
discard_ctl->prev_discard = trimmed;
|
||||
discard_ctl->prev_discard_time = ktime_get_ns();
|
||||
|
||||
/* Determine next steps for a block_group */
|
||||
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
|
||||
if (discard_state == BTRFS_DISCARD_BITMAPS) {
|
||||
@ -496,11 +496,13 @@ static void btrfs_discard_workfn(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
now = ktime_get_ns();
|
||||
spin_lock(&discard_ctl->lock);
|
||||
discard_ctl->prev_discard = trimmed;
|
||||
discard_ctl->prev_discard_time = now;
|
||||
discard_ctl->block_group = NULL;
|
||||
__btrfs_discard_schedule_work(discard_ctl, now, false);
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
|
||||
btrfs_discard_schedule_work(discard_ctl, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1729,7 +1729,7 @@ static int cleaner_kthread(void *arg)
|
||||
*/
|
||||
btrfs_delete_unused_bgs(fs_info);
|
||||
sleep:
|
||||
clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
|
||||
clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
|
||||
if (kthread_should_park())
|
||||
kthread_parkme();
|
||||
if (kthread_should_stop())
|
||||
@ -2830,6 +2830,9 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
|
||||
return -ENOMEM;
|
||||
btrfs_init_delayed_root(fs_info->delayed_root);
|
||||
|
||||
if (sb_rdonly(sb))
|
||||
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
|
||||
|
||||
return btrfs_alloc_stripe_hash_table(fs_info);
|
||||
}
|
||||
|
||||
@ -2969,6 +2972,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_find_orphan_roots(fs_info);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@ -3383,10 +3387,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_find_orphan_roots(fs_info);
|
||||
if (ret)
|
||||
goto fail_qgroup;
|
||||
|
||||
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
|
||||
if (IS_ERR(fs_info->fs_root)) {
|
||||
err = PTR_ERR(fs_info->fs_root);
|
||||
@ -4181,6 +4181,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
|
||||
btrfs_stop_all_workers(fs_info);
|
||||
|
||||
/* We shouldn't have any transaction open at this point */
|
||||
ASSERT(list_empty(&fs_info->trans_list));
|
||||
|
||||
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
||||
free_root_pointers(fs_info, true);
|
||||
btrfs_free_fs_roots(fs_info);
|
||||
|
@ -844,6 +844,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
want = extent_ref_type(parent, owner);
|
||||
if (insert) {
|
||||
extra_size = btrfs_extent_inline_ref_size(want);
|
||||
path->search_for_extension = 1;
|
||||
path->keep_locks = 1;
|
||||
} else
|
||||
extra_size = -1;
|
||||
@ -996,6 +997,7 @@ again:
|
||||
out:
|
||||
if (insert) {
|
||||
path->keep_locks = 0;
|
||||
path->search_for_extension = 0;
|
||||
btrfs_unlock_up_safe(path, 1);
|
||||
}
|
||||
return err;
|
||||
|
@ -1016,8 +1016,10 @@ again:
|
||||
}
|
||||
|
||||
btrfs_release_path(path);
|
||||
path->search_for_extension = 1;
|
||||
ret = btrfs_search_slot(trans, root, &file_key, path,
|
||||
csum_size, 1);
|
||||
path->search_for_extension = 0;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
|
@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
|
||||
* some fairly slow code that needs optimization. This walks the list
|
||||
* of all the inodes with pending delalloc and forces them to disk.
|
||||
*/
|
||||
static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot)
|
||||
static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot,
|
||||
bool in_reclaim_context)
|
||||
{
|
||||
struct btrfs_inode *binode;
|
||||
struct inode *inode;
|
||||
@ -9411,6 +9412,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
|
||||
|
||||
list_move_tail(&binode->delalloc_inodes,
|
||||
&root->delalloc_inodes);
|
||||
|
||||
if (in_reclaim_context &&
|
||||
test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
|
||||
continue;
|
||||
|
||||
inode = igrab(&binode->vfs_inode);
|
||||
if (!inode) {
|
||||
cond_resched_lock(&root->delalloc_lock);
|
||||
@ -9464,10 +9470,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
|
||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
|
||||
return -EROFS;
|
||||
|
||||
return start_delalloc_inodes(root, &nr, true);
|
||||
return start_delalloc_inodes(root, &nr, true, false);
|
||||
}
|
||||
|
||||
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
|
||||
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
bool in_reclaim_context)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
@ -9490,7 +9497,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
|
||||
&fs_info->delalloc_roots);
|
||||
spin_unlock(&fs_info->delalloc_root_lock);
|
||||
|
||||
ret = start_delalloc_inodes(root, &nr, false);
|
||||
ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context);
|
||||
btrfs_put_root(root);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
@ -4951,7 +4951,7 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_SYNC: {
|
||||
int ret;
|
||||
|
||||
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
|
||||
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = btrfs_sync_fs(inode->i_sb, 1);
|
||||
|
@ -3190,6 +3190,12 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return btrfs_fs_closing(fs_info) ||
|
||||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
||||
}
|
||||
|
||||
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
|
||||
@ -3198,6 +3204,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
int err = -ENOMEM;
|
||||
int ret = 0;
|
||||
bool stopped = false;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
@ -3210,7 +3217,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
path->skip_locking = 1;
|
||||
|
||||
err = 0;
|
||||
while (!err && !btrfs_fs_closing(fs_info)) {
|
||||
while (!err && !(stopped = rescan_should_stop(fs_info))) {
|
||||
trans = btrfs_start_transaction(fs_info->fs_root, 0);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
@ -3253,7 +3260,7 @@ out:
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
if (!btrfs_fs_closing(fs_info))
|
||||
if (!stopped)
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
if (trans) {
|
||||
ret = update_qgroup_status_item(trans);
|
||||
@ -3272,7 +3279,7 @@ out:
|
||||
|
||||
btrfs_end_transaction(trans);
|
||||
|
||||
if (btrfs_fs_closing(fs_info)) {
|
||||
if (stopped) {
|
||||
btrfs_info(fs_info, "qgroup scan paused");
|
||||
} else if (err >= 0) {
|
||||
btrfs_info(fs_info, "qgroup scan completed%s",
|
||||
@ -3530,16 +3537,6 @@ static int try_flush_qgroup(struct btrfs_root *root)
|
||||
int ret;
|
||||
bool can_commit = true;
|
||||
|
||||
/*
|
||||
* We don't want to run flush again and again, so if there is a running
|
||||
* one, we won't try to start a new flush, but exit directly.
|
||||
*/
|
||||
if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
|
||||
wait_event(root->qgroup_flush_wait,
|
||||
!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If current process holds a transaction, we shouldn't flush, as we
|
||||
* assume all space reservation happens before a transaction handle is
|
||||
@ -3554,6 +3551,26 @@ static int try_flush_qgroup(struct btrfs_root *root)
|
||||
current->journal_info != BTRFS_SEND_TRANS_STUB)
|
||||
can_commit = false;
|
||||
|
||||
/*
|
||||
* We don't want to run flush again and again, so if there is a running
|
||||
* one, we won't try to start a new flush, but exit directly.
|
||||
*/
|
||||
if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
|
||||
/*
|
||||
* We are already holding a transaction, thus we can block other
|
||||
* threads from flushing. So exit right now. This increases
|
||||
* the chance of EDQUOT for heavy load and near limit cases.
|
||||
* But we can argue that if we're already near limit, EDQUOT is
|
||||
* unavoidable anyway.
|
||||
*/
|
||||
if (!can_commit)
|
||||
return 0;
|
||||
|
||||
wait_event(root->qgroup_flush_wait,
|
||||
!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = btrfs_start_delalloc_snapshot(root);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* After dirtying the page our caller will need to start a transaction,
|
||||
* and if we are low on metadata free space, that can cause flushing of
|
||||
* delalloc for all inodes in order to get metadata space released.
|
||||
* However we are holding the range locked for the whole duration of
|
||||
* the clone/dedupe operation, so we may deadlock if that happens and no
|
||||
* other task releases enough space. So mark this inode as not being
|
||||
* possible to flush to avoid such deadlock. We will clear that flag
|
||||
* when we finish cloning all extents, since a transaction is started
|
||||
* after finding each extent to clone.
|
||||
*/
|
||||
set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
|
||||
|
||||
if (comp_type == BTRFS_COMPRESS_NONE) {
|
||||
char *map;
|
||||
|
||||
@ -549,6 +562,8 @@ process_slot:
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
kvfree(buf);
|
||||
clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -236,6 +236,7 @@ struct waiting_dir_move {
|
||||
* after this directory is moved, we can try to rmdir the ino rmdir_ino.
|
||||
*/
|
||||
u64 rmdir_ino;
|
||||
u64 rmdir_gen;
|
||||
bool orphanized;
|
||||
};
|
||||
|
||||
@ -316,7 +317,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
|
||||
static struct waiting_dir_move *
|
||||
get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
|
||||
|
||||
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
|
||||
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
|
||||
|
||||
static int need_send_hole(struct send_ctx *sctx)
|
||||
{
|
||||
@ -2299,7 +2300,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
|
||||
|
||||
fs_path_reset(name);
|
||||
|
||||
if (is_waiting_for_rm(sctx, ino)) {
|
||||
if (is_waiting_for_rm(sctx, ino, gen)) {
|
||||
ret = gen_unique_name(sctx, ino, gen, name);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@ -2858,8 +2859,8 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct orphan_dir_info *
|
||||
add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
|
||||
static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
|
||||
u64 dir_ino, u64 dir_gen)
|
||||
{
|
||||
struct rb_node **p = &sctx->orphan_dirs.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
@ -2868,20 +2869,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct orphan_dir_info, node);
|
||||
if (dir_ino < entry->ino) {
|
||||
if (dir_ino < entry->ino)
|
||||
p = &(*p)->rb_left;
|
||||
} else if (dir_ino > entry->ino) {
|
||||
else if (dir_ino > entry->ino)
|
||||
p = &(*p)->rb_right;
|
||||
} else {
|
||||
else if (dir_gen < entry->gen)
|
||||
p = &(*p)->rb_left;
|
||||
else if (dir_gen > entry->gen)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
odi = kmalloc(sizeof(*odi), GFP_KERNEL);
|
||||
if (!odi)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
odi->ino = dir_ino;
|
||||
odi->gen = 0;
|
||||
odi->gen = dir_gen;
|
||||
odi->last_dir_index_offset = 0;
|
||||
|
||||
rb_link_node(&odi->node, parent, p);
|
||||
@ -2889,8 +2893,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
|
||||
return odi;
|
||||
}
|
||||
|
||||
static struct orphan_dir_info *
|
||||
get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
|
||||
static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
|
||||
u64 dir_ino, u64 gen)
|
||||
{
|
||||
struct rb_node *n = sctx->orphan_dirs.rb_node;
|
||||
struct orphan_dir_info *entry;
|
||||
@ -2901,15 +2905,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
|
||||
n = n->rb_left;
|
||||
else if (dir_ino > entry->ino)
|
||||
n = n->rb_right;
|
||||
else if (gen < entry->gen)
|
||||
n = n->rb_left;
|
||||
else if (gen > entry->gen)
|
||||
n = n->rb_right;
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
|
||||
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
|
||||
{
|
||||
struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
|
||||
struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
|
||||
|
||||
return odi != NULL;
|
||||
}
|
||||
@ -2954,7 +2962,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
||||
key.type = BTRFS_DIR_INDEX_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
odi = get_orphan_dir_info(sctx, dir);
|
||||
odi = get_orphan_dir_info(sctx, dir, dir_gen);
|
||||
if (odi)
|
||||
key.offset = odi->last_dir_index_offset;
|
||||
|
||||
@ -2985,7 +2993,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
||||
|
||||
dm = get_waiting_dir_move(sctx, loc.objectid);
|
||||
if (dm) {
|
||||
odi = add_orphan_dir_info(sctx, dir);
|
||||
odi = add_orphan_dir_info(sctx, dir, dir_gen);
|
||||
if (IS_ERR(odi)) {
|
||||
ret = PTR_ERR(odi);
|
||||
goto out;
|
||||
@ -2993,12 +3001,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
||||
odi->gen = dir_gen;
|
||||
odi->last_dir_index_offset = found_key.offset;
|
||||
dm->rmdir_ino = dir;
|
||||
dm->rmdir_gen = dir_gen;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (loc.objectid > send_progress) {
|
||||
odi = add_orphan_dir_info(sctx, dir);
|
||||
odi = add_orphan_dir_info(sctx, dir, dir_gen);
|
||||
if (IS_ERR(odi)) {
|
||||
ret = PTR_ERR(odi);
|
||||
goto out;
|
||||
@ -3038,6 +3047,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
|
||||
return -ENOMEM;
|
||||
dm->ino = ino;
|
||||
dm->rmdir_ino = 0;
|
||||
dm->rmdir_gen = 0;
|
||||
dm->orphanized = orphanized;
|
||||
|
||||
while (*p) {
|
||||
@ -3183,7 +3193,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name,
|
||||
while (ino != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
fs_path_reset(name);
|
||||
|
||||
if (is_waiting_for_rm(sctx, ino))
|
||||
if (is_waiting_for_rm(sctx, ino, gen))
|
||||
break;
|
||||
if (is_waiting_for_move(sctx, ino)) {
|
||||
if (*ancestor_ino == 0)
|
||||
@ -3223,6 +3233,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
|
||||
u64 parent_ino, parent_gen;
|
||||
struct waiting_dir_move *dm = NULL;
|
||||
u64 rmdir_ino = 0;
|
||||
u64 rmdir_gen;
|
||||
u64 ancestor;
|
||||
bool is_orphan;
|
||||
int ret;
|
||||
@ -3237,6 +3248,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
|
||||
dm = get_waiting_dir_move(sctx, pm->ino);
|
||||
ASSERT(dm);
|
||||
rmdir_ino = dm->rmdir_ino;
|
||||
rmdir_gen = dm->rmdir_gen;
|
||||
is_orphan = dm->orphanized;
|
||||
free_waiting_dir_move(sctx, dm);
|
||||
|
||||
@ -3273,6 +3285,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
|
||||
dm = get_waiting_dir_move(sctx, pm->ino);
|
||||
ASSERT(dm);
|
||||
dm->rmdir_ino = rmdir_ino;
|
||||
dm->rmdir_gen = rmdir_gen;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
@ -3291,7 +3304,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
|
||||
struct orphan_dir_info *odi;
|
||||
u64 gen;
|
||||
|
||||
odi = get_orphan_dir_info(sctx, rmdir_ino);
|
||||
odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
|
||||
if (!odi) {
|
||||
/* already deleted */
|
||||
goto finish;
|
||||
|
@ -532,7 +532,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
|
||||
|
||||
loops = 0;
|
||||
while ((delalloc_bytes || dio_bytes) && loops < 3) {
|
||||
btrfs_start_delalloc_roots(fs_info, items);
|
||||
btrfs_start_delalloc_roots(fs_info, items, true);
|
||||
|
||||
loops++;
|
||||
if (wait_ordered && !trans) {
|
||||
|
@ -175,7 +175,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
||||
btrfs_discard_stop(fs_info);
|
||||
|
||||
/* btrfs handle error by forcing the filesystem readonly */
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
btrfs_set_sb_rdonly(sb);
|
||||
btrfs_info(fs_info, "forced readonly");
|
||||
/*
|
||||
* Note that a running device replace operation is not canceled here
|
||||
@ -1953,7 +1953,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
/* avoid complains from lockdep et al. */
|
||||
up(&fs_info->uuid_tree_rescan_sem);
|
||||
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
btrfs_set_sb_rdonly(sb);
|
||||
|
||||
/*
|
||||
* Setting SB_RDONLY will put the cleaner thread to
|
||||
@ -1964,10 +1964,42 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
*/
|
||||
btrfs_delete_unused_bgs(fs_info);
|
||||
|
||||
/*
|
||||
* The cleaner task could be already running before we set the
|
||||
* flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock).
|
||||
* We must make sure that after we finish the remount, i.e. after
|
||||
* we call btrfs_commit_super(), the cleaner can no longer start
|
||||
* a transaction - either because it was dropping a dead root,
|
||||
* running delayed iputs or deleting an unused block group (the
|
||||
* cleaner picked a block group from the list of unused block
|
||||
* groups before we were able to in the previous call to
|
||||
* btrfs_delete_unused_bgs()).
|
||||
*/
|
||||
wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* We've set the superblock to RO mode, so we might have made
|
||||
* the cleaner task sleep without running all pending delayed
|
||||
* iputs. Go through all the delayed iputs here, so that if an
|
||||
* unmount happens without remounting RW we don't end up at
|
||||
* finishing close_ctree() with a non-empty list of delayed
|
||||
* iputs.
|
||||
*/
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
|
||||
btrfs_dev_replace_suspend_for_unmount(fs_info);
|
||||
btrfs_scrub_cancel(fs_info);
|
||||
btrfs_pause_balance(fs_info);
|
||||
|
||||
/*
|
||||
* Pause the qgroup rescan worker if it is running. We don't want
|
||||
* it to be still running after we are in RO mode, as after that,
|
||||
* by the time we unmount, it might have left a transaction open,
|
||||
* so we would leak the transaction and/or crash.
|
||||
*/
|
||||
btrfs_qgroup_wait_for_completion(fs_info, false);
|
||||
|
||||
ret = btrfs_commit_super(fs_info);
|
||||
if (ret)
|
||||
goto restore;
|
||||
@ -2006,7 +2038,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (ret)
|
||||
goto restore;
|
||||
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
btrfs_clear_sb_rdonly(sb);
|
||||
|
||||
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
||||
}
|
||||
@ -2028,6 +2060,8 @@ restore:
|
||||
/* We've hit an error - don't reset SB_RDONLY */
|
||||
if (sb_rdonly(sb))
|
||||
old_flags |= SB_RDONLY;
|
||||
if (!(old_flags & SB_RDONLY))
|
||||
clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
|
||||
sb->s_flags = old_flags;
|
||||
fs_info->mount_opt = old_opts;
|
||||
fs_info->compress_type = old_compress_type;
|
||||
|
@ -55,8 +55,14 @@ struct inode *btrfs_new_test_inode(void)
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode(test_mnt->mnt_sb);
|
||||
if (inode)
|
||||
inode_init_owner(inode, NULL, S_IFREG);
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
inode->i_mode = S_IFREG;
|
||||
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
|
||||
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
inode_init_owner(inode, NULL, S_IFREG);
|
||||
|
||||
return inode;
|
||||
}
|
||||
|
@ -232,11 +232,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
return ret;
|
||||
}
|
||||
|
||||
inode->i_mode = S_IFREG;
|
||||
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
|
||||
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
|
||||
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
|
||||
if (!fs_info) {
|
||||
test_std_err(TEST_ALLOC_FS_INFO);
|
||||
@ -835,10 +830,6 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
||||
return ret;
|
||||
}
|
||||
|
||||
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
|
||||
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
|
||||
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
|
||||
if (!fs_info) {
|
||||
test_std_err(TEST_ALLOC_FS_INFO);
|
||||
|
@ -2592,7 +2592,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
|
||||
|
||||
if (seeding_dev) {
|
||||
sb->s_flags &= ~SB_RDONLY;
|
||||
btrfs_clear_sb_rdonly(sb);
|
||||
ret = btrfs_prepare_sprout(fs_info);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@ -2728,7 +2728,7 @@ error_sysfs:
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
error_trans:
|
||||
if (seeding_dev)
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
btrfs_set_sb_rdonly(sb);
|
||||
if (trans)
|
||||
btrfs_end_transaction(trans);
|
||||
error_free_zone:
|
||||
|
Loading…
Reference in New Issue
Block a user