forked from Minki/linux
for-5.17-rc2-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmH9eUcACgkQxWXV+ddt WDvCvQ//bANu7air/Og5r2Mn0ZYyrcQl+yDYE75UC/tzTZNNtP8guwGllwlcsA0v RQPiuFFtvjKMgKP6Eo1mVeUPkpX83VQkT+sqFRsFEFxazIXnSvEJ+iHVcuiZvgj1 VkTjdt7/mLb573zSA0MLhJqK1BBuFhUTCCHFHlCLoiYeekPAui1pbUC4LAE/+ksU veCn9YS+NGkDpIv/b9mcALVBe+XkZlmw1LON8ArEbpY4ToafRk0qZfhV7CvyRbSP Y1zLUScNLHGoR2WA1WhwlwuMePdhgX/8neGNiXsiw3WnmZhFoUVX7oUa6IWnKkKk dD+x5Z3Z2xBQGK8djyqxzUFJ2VAvz15xGIM452ofGa1BJFZgV9hjPA6Y4RFdWx63 4AZ6OJwhrXhgMtWBhRtM6mGeje56ljwaxku9qhe585z8H5V8ezUNwWVkjY0bLKsd iT3bUHEReoYRWuyszI1ZSm1DbyzNY2943ly97p/j8qKp4SHX39/QYAKmnuuHdIup TnTBJOh38rj4S8BfF873aKAo7EfLJcDbTcZ1ivbuX5FeByRuQB4F0c1RRi4usMLc DL5mhDhT71U1l/LF3IANQ4ieUfZbeFHd+dAVkYsGkYzzaWL8E03L582l/fqaVGsp RaVpiuKnh2cyDXUxob8IYT5mZ/saa96xBSK8VEqnwjNEQCzKEeU= =5MJl -----END PGP SIGNATURE----- Merge tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs fixes from David Sterba: "A few fixes and error handling improvements: - fix deadlock between quota disable and qgroup rescan worker - fix use-after-free after failure to create a snapshot - skip warning on unmount after log cleanup failure - don't start transaction for scrub if the fs is mounted read-only - tree checker verifies item sizes" * tag 'for-5.17-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: skip reserved bytes warning on unmount after log cleanup failure btrfs: fix use of uninitialized variable at rm device ioctl btrfs: fix use-after-free after failure to create a snapshot btrfs: tree-checker: check item_size for dev_item btrfs: tree-checker: check item_size for inode_item btrfs: fix deadlock between quota disable and qgroup rescan worker btrfs: don't start transaction for scrub if the fs is mounted read-only
This commit is contained in:
commit
86286e486c
@ -124,7 +124,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
||||
{
|
||||
if (refcount_dec_and_test(&cache->refs)) {
|
||||
WARN_ON(cache->pinned > 0);
|
||||
WARN_ON(cache->reserved > 0);
|
||||
/*
|
||||
* If there was a failure to cleanup a log tree, very likely due
|
||||
* to an IO failure on a writeback attempt of one or more of its
|
||||
* extent buffers, we could not do proper (and cheap) unaccounting
|
||||
* of their reserved space, so don't warn on reserved > 0 in that
|
||||
* case.
|
||||
*/
|
||||
if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) ||
|
||||
!BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info))
|
||||
WARN_ON(cache->reserved > 0);
|
||||
|
||||
/*
|
||||
* A block_group shouldn't be on the discard_list anymore.
|
||||
@ -2544,6 +2553,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
|
||||
int ret;
|
||||
bool dirty_bg_running;
|
||||
|
||||
/*
|
||||
* This can only happen when we are doing read-only scrub on read-only
|
||||
* mount.
|
||||
* In that case we should not start a new transaction on read-only fs.
|
||||
* Thus here we skip all chunk allocations.
|
||||
*/
|
||||
if (sb_rdonly(fs_info->sb)) {
|
||||
mutex_lock(&fs_info->ro_block_group_mutex);
|
||||
ret = inc_block_group_ro(cache, 0);
|
||||
mutex_unlock(&fs_info->ro_block_group_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
do {
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
@ -3974,9 +3996,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||
* important and indicates a real bug if this happens.
|
||||
*/
|
||||
if (WARN_ON(space_info->bytes_pinned > 0 ||
|
||||
space_info->bytes_reserved > 0 ||
|
||||
space_info->bytes_may_use > 0))
|
||||
btrfs_dump_space_info(info, space_info, 0, 0);
|
||||
|
||||
/*
|
||||
* If there was a failure to cleanup a log tree, very likely due
|
||||
* to an IO failure on a writeback attempt of one or more of its
|
||||
* extent buffers, we could not do proper (and cheap) unaccounting
|
||||
* of their reserved space, so don't warn on bytes_reserved > 0 in
|
||||
* that case.
|
||||
*/
|
||||
if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) ||
|
||||
!BTRFS_FS_LOG_CLEANUP_ERROR(info)) {
|
||||
if (WARN_ON(space_info->bytes_reserved > 0))
|
||||
btrfs_dump_space_info(info, space_info, 0, 0);
|
||||
}
|
||||
|
||||
WARN_ON(space_info->reclaim_size > 0);
|
||||
list_del(&space_info->list);
|
||||
btrfs_sysfs_remove_space_info(space_info);
|
||||
|
@ -145,6 +145,9 @@ enum {
|
||||
BTRFS_FS_STATE_DUMMY_FS_INFO,
|
||||
|
||||
BTRFS_FS_STATE_NO_CSUMS,
|
||||
|
||||
/* Indicates there was an error cleaning up a log tree. */
|
||||
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
|
||||
};
|
||||
|
||||
#define BTRFS_BACKREF_REV_MAX 256
|
||||
@ -3593,6 +3596,9 @@ do { \
|
||||
|
||||
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
|
||||
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
|
||||
__printf(5, 6)
|
||||
__cold
|
||||
|
@ -805,10 +805,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
list_add(&pending_snapshot->list,
|
||||
&trans->transaction->pending_snapshots);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
trans->pending_snapshot = pending_snapshot;
|
||||
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret)
|
||||
@ -3354,7 +3351,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
|
||||
struct block_device *bdev = NULL;
|
||||
fmode_t mode;
|
||||
int ret;
|
||||
bool cancel;
|
||||
bool cancel = false;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
@ -1185,9 +1185,24 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We need to have subvol_sem write locked, to prevent races between
|
||||
* concurrent tasks trying to disable quotas, because we will unlock
|
||||
* and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
|
||||
*/
|
||||
lockdep_assert_held_write(&fs_info->subvol_sem);
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
if (!fs_info->quota_root)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Request qgroup rescan worker to complete and wait for it. This wait
|
||||
* must be done before transaction start for quota disable since it may
|
||||
* deadlock with transaction by the qgroup rescan worker.
|
||||
*/
|
||||
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
|
||||
btrfs_qgroup_wait_for_completion(fs_info, false);
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
|
||||
/*
|
||||
@ -1205,14 +1220,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!fs_info->quota_root)
|
||||
goto out;
|
||||
|
||||
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
|
||||
btrfs_qgroup_wait_for_completion(fs_info, false);
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
fs_info->quota_root = NULL;
|
||||
@ -3383,6 +3397,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
||||
btrfs_warn(fs_info,
|
||||
"qgroup rescan init failed, qgroup is not enabled");
|
||||
ret = -EINVAL;
|
||||
} else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
|
||||
/* Quota disable is in progress */
|
||||
ret = -EBUSY;
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
|
@ -2000,6 +2000,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a pending snapshot associated with the given transaction handle to the
|
||||
* respective handle. This must be called after the transaction commit started
|
||||
* and while holding fs_info->trans_lock.
|
||||
* This serves to guarantee a caller of btrfs_commit_transaction() that it can
|
||||
* safely free the pending snapshot pointer in case btrfs_commit_transaction()
|
||||
* returns an error.
|
||||
*/
|
||||
static void add_pending_snapshot(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||
|
||||
if (!trans->pending_snapshot)
|
||||
return;
|
||||
|
||||
lockdep_assert_held(&trans->fs_info->trans_lock);
|
||||
ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
|
||||
|
||||
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
|
||||
}
|
||||
|
||||
int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -2073,6 +2094,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
|
||||
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
|
||||
|
||||
add_pending_snapshot(trans);
|
||||
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
|
||||
@ -2163,6 +2186,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* COMMIT_DOING so make sure to wait for num_writers to == 1 again.
|
||||
*/
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
add_pending_snapshot(trans);
|
||||
cur_trans->state = TRANS_STATE_COMMIT_DOING;
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
wait_event(cur_trans->writer_wait,
|
||||
|
@ -123,6 +123,8 @@ struct btrfs_trans_handle {
|
||||
struct btrfs_transaction *transaction;
|
||||
struct btrfs_block_rsv *block_rsv;
|
||||
struct btrfs_block_rsv *orig_rsv;
|
||||
/* Set by a task that wants to create a snapshot. */
|
||||
struct btrfs_pending_snapshot *pending_snapshot;
|
||||
refcount_t use_count;
|
||||
unsigned int type;
|
||||
/*
|
||||
|
@ -965,6 +965,7 @@ static int check_dev_item(struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_dev_item *ditem;
|
||||
const u32 item_size = btrfs_item_size(leaf, slot);
|
||||
|
||||
if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) {
|
||||
dev_item_err(leaf, slot,
|
||||
@ -972,6 +973,13 @@ static int check_dev_item(struct extent_buffer *leaf,
|
||||
key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
if (unlikely(item_size != sizeof(*ditem))) {
|
||||
dev_item_err(leaf, slot, "invalid item size: has %u expect %zu",
|
||||
item_size, sizeof(*ditem));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
|
||||
if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) {
|
||||
dev_item_err(leaf, slot,
|
||||
@ -1007,6 +1015,7 @@ static int check_inode_item(struct extent_buffer *leaf,
|
||||
struct btrfs_inode_item *iitem;
|
||||
u64 super_gen = btrfs_super_generation(fs_info->super_copy);
|
||||
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
|
||||
const u32 item_size = btrfs_item_size(leaf, slot);
|
||||
u32 mode;
|
||||
int ret;
|
||||
u32 flags;
|
||||
@ -1016,6 +1025,12 @@ static int check_inode_item(struct extent_buffer *leaf,
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
if (unlikely(item_size != sizeof(*iitem))) {
|
||||
generic_err(leaf, slot, "invalid item size: has %u expect %zu",
|
||||
item_size, sizeof(*iitem));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
|
||||
|
||||
/* Here we use super block generation + 1 to handle log tree */
|
||||
|
@ -3414,6 +3414,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
|
||||
if (log->node) {
|
||||
ret = walk_log_tree(trans, log, &wc);
|
||||
if (ret) {
|
||||
/*
|
||||
* We weren't able to traverse the entire log tree, the
|
||||
* typical scenario is getting an -EIO when reading an
|
||||
* extent buffer of the tree, due to a previous writeback
|
||||
* failure of it.
|
||||
*/
|
||||
set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
|
||||
&log->fs_info->fs_state);
|
||||
|
||||
/*
|
||||
* Some extent buffers of the log tree may still be dirty
|
||||
* and not yet written back to storage, because we may
|
||||
* have updates to a log tree without syncing a log tree,
|
||||
* such as during rename and link operations. So flush
|
||||
* them out and wait for their writeback to complete, so
|
||||
* that we properly cleanup their state and pages.
|
||||
*/
|
||||
btrfs_write_marked_extents(log->fs_info,
|
||||
&log->dirty_log_pages,
|
||||
EXTENT_DIRTY | EXTENT_NEW);
|
||||
btrfs_wait_tree_log_extents(log,
|
||||
EXTENT_DIRTY | EXTENT_NEW);
|
||||
|
||||
if (trans)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
else
|
||||
|
Loading…
Reference in New Issue
Block a user