Merge tag 'for-5.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The updates this time are more under the hood and enhancing existing
features (subpage with compression and zoned namespaces).
Performance related:
- misc small inode logging improvements (+3% throughput, -11% latency
on sample dbench workload)
- more efficient directory logging: bulk item insertion, less tree
searches and locking
- speed up bulk insertion of items into a b-tree, which is used when
logging directories, when running delayed items for directories
(fsync and transaction commits) and when running the slow path
(full sync) of an fsync (bulk creation run time -4%, deletion -12%)
Core:
- continued subpage support
- make defragmentation work
- make compression write work
- zoned mode
- support ZNS (zoned namespaces), zone capacity is number of
usable blocks in each zone
- add dedicated block group (zoned) for relocation, to prevent
out of order writes in some cases
- greedy block group reclaim, pick the ones with least usable
space first
- preparatory work for send protocol updates
- error handling improvements
- cleanups and refactoring
Fixes:
- lockdep warnings
- in show_devname callback, on seeding device
- device delete on loop device due to conversions to workqueues
- fix deadlock between chunk allocation and chunk btree modifications
- fix tracking of missing device count and status"
* tag 'for-5.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (140 commits)
btrfs: remove root argument from check_item_in_log()
btrfs: remove root argument from add_link()
btrfs: remove root argument from btrfs_unlink_inode()
btrfs: remove root argument from drop_one_dir_item()
btrfs: clear MISSING device status bit in btrfs_close_one_device
btrfs: call btrfs_check_rw_degradable only if there is a missing device
btrfs: send: prepare for v2 protocol
btrfs: fix comment about sector sizes supported in 64K systems
btrfs: update device path inode time instead of bd_inode
fs: export an inode_update_time helper
btrfs: fix deadlock when defragging transparent huge pages
btrfs: sysfs: convert scnprintf and snprintf to sysfs_emit
btrfs: make btrfs_super_block size match BTRFS_SUPER_INFO_SIZE
btrfs: update comments for chunk allocation -ENOSPC cases
btrfs: fix deadlock between chunk allocation and chunk btree modifications
btrfs: zoned: use greedy gc for auto reclaim
btrfs: check-integrity: stop storing the block device name in btrfsic_dev_state
btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls
btrfs: add a btrfs_get_dev_args_from_path helper
btrfs: handle device lookup with btrfs_dev_lookup_args
...
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/list_sort.h>
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "ctree.h"
|
#include "ctree.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
@@ -144,6 +145,7 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
|||||||
*/
|
*/
|
||||||
WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
|
WARN_ON(!RB_EMPTY_ROOT(&cache->full_stripe_locks_root.root));
|
||||||
kfree(cache->free_space_ctl);
|
kfree(cache->free_space_ctl);
|
||||||
|
kfree(cache->physical_map);
|
||||||
kfree(cache);
|
kfree(cache);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -902,6 +904,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
|||||||
spin_unlock(&cluster->refill_lock);
|
spin_unlock(&cluster->refill_lock);
|
||||||
|
|
||||||
btrfs_clear_treelog_bg(block_group);
|
btrfs_clear_treelog_bg(block_group);
|
||||||
|
btrfs_clear_data_reloc_bg(block_group);
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path) {
|
if (!path) {
|
||||||
@@ -1484,6 +1487,21 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
|
|||||||
spin_unlock(&fs_info->unused_bgs_lock);
|
spin_unlock(&fs_info->unused_bgs_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We want block groups with a low number of used bytes to be in the beginning
|
||||||
|
* of the list, so they will get reclaimed first.
|
||||||
|
*/
|
||||||
|
static int reclaim_bgs_cmp(void *unused, const struct list_head *a,
|
||||||
|
const struct list_head *b)
|
||||||
|
{
|
||||||
|
const struct btrfs_block_group *bg1, *bg2;
|
||||||
|
|
||||||
|
bg1 = list_entry(a, struct btrfs_block_group, bg_list);
|
||||||
|
bg2 = list_entry(b, struct btrfs_block_group, bg_list);
|
||||||
|
|
||||||
|
return bg1->used > bg2->used;
|
||||||
|
}
|
||||||
|
|
||||||
void btrfs_reclaim_bgs_work(struct work_struct *work)
|
void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info =
|
struct btrfs_fs_info *fs_info =
|
||||||
@@ -1508,6 +1526,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
|||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&fs_info->unused_bgs_lock);
|
spin_lock(&fs_info->unused_bgs_lock);
|
||||||
|
/*
|
||||||
|
* Sort happens under lock because we can't simply splice it and sort.
|
||||||
|
* The block groups might still be in use and reachable via bg_list,
|
||||||
|
* and their presence in the reclaim_bgs list must be preserved.
|
||||||
|
*/
|
||||||
|
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
|
||||||
while (!list_empty(&fs_info->reclaim_bgs)) {
|
while (!list_empty(&fs_info->reclaim_bgs)) {
|
||||||
u64 zone_unusable;
|
u64 zone_unusable;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
@@ -1895,6 +1919,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
|||||||
INIT_LIST_HEAD(&cache->discard_list);
|
INIT_LIST_HEAD(&cache->discard_list);
|
||||||
INIT_LIST_HEAD(&cache->dirty_list);
|
INIT_LIST_HEAD(&cache->dirty_list);
|
||||||
INIT_LIST_HEAD(&cache->io_list);
|
INIT_LIST_HEAD(&cache->io_list);
|
||||||
|
INIT_LIST_HEAD(&cache->active_bg_list);
|
||||||
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||||
atomic_set(&cache->frozen, 0);
|
atomic_set(&cache->frozen, 0);
|
||||||
mutex_init(&cache->free_space_lock);
|
mutex_init(&cache->free_space_lock);
|
||||||
@@ -2035,6 +2060,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
|||||||
*/
|
*/
|
||||||
if (btrfs_is_zoned(info)) {
|
if (btrfs_is_zoned(info)) {
|
||||||
btrfs_calc_zone_unusable(cache);
|
btrfs_calc_zone_unusable(cache);
|
||||||
|
/* Should not have any excluded extents. Just in case, though. */
|
||||||
|
btrfs_free_excluded_extents(cache);
|
||||||
} else if (cache->length == cache->used) {
|
} else if (cache->length == cache->used) {
|
||||||
cache->last_byte_to_unpin = (u64)-1;
|
cache->last_byte_to_unpin = (u64)-1;
|
||||||
cache->cached = BTRFS_CACHE_FINISHED;
|
cache->cached = BTRFS_CACHE_FINISHED;
|
||||||
@@ -2062,15 +2089,18 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
|||||||
link_block_group(cache);
|
link_block_group(cache);
|
||||||
|
|
||||||
set_avail_alloc_bits(info, cache->flags);
|
set_avail_alloc_bits(info, cache->flags);
|
||||||
if (btrfs_chunk_readonly(info, cache->start)) {
|
if (btrfs_chunk_writeable(info, cache->start)) {
|
||||||
inc_block_group_ro(cache, 1);
|
if (cache->used == 0) {
|
||||||
} else if (cache->used == 0) {
|
|
||||||
ASSERT(list_empty(&cache->bg_list));
|
ASSERT(list_empty(&cache->bg_list));
|
||||||
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||||
btrfs_discard_queue_work(&info->discard_ctl, cache);
|
btrfs_discard_queue_work(&info->discard_ctl, cache);
|
||||||
else
|
else
|
||||||
btrfs_mark_bg_unused(cache);
|
btrfs_mark_bg_unused(cache);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
inc_block_group_ro(cache, 1);
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
error:
|
error:
|
||||||
btrfs_put_block_group(cache);
|
btrfs_put_block_group(cache);
|
||||||
@@ -2438,6 +2468,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
|||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* New block group is likely to be used soon. Try to activate it now.
|
||||||
|
* Failure is OK for now.
|
||||||
|
*/
|
||||||
|
btrfs_zone_activate(cache);
|
||||||
|
|
||||||
ret = exclude_super_stripes(cache);
|
ret = exclude_super_stripes(cache);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/* We may have excluded something, so call this just in case */
|
/* We may have excluded something, so call this just in case */
|
||||||
@@ -2479,7 +2515,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
|||||||
*/
|
*/
|
||||||
trace_btrfs_add_block_group(fs_info, cache, 1);
|
trace_btrfs_add_block_group(fs_info, cache, 1);
|
||||||
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
|
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
|
||||||
cache->bytes_super, 0, &cache->space_info);
|
cache->bytes_super, cache->zone_unusable,
|
||||||
|
&cache->space_info);
|
||||||
btrfs_update_global_block_rsv(fs_info);
|
btrfs_update_global_block_rsv(fs_info);
|
||||||
|
|
||||||
link_block_group(cache);
|
link_block_group(cache);
|
||||||
@@ -2594,7 +2631,9 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
|
|||||||
if (!--cache->ro) {
|
if (!--cache->ro) {
|
||||||
if (btrfs_is_zoned(cache->fs_info)) {
|
if (btrfs_is_zoned(cache->fs_info)) {
|
||||||
/* Migrate zone_unusable bytes back */
|
/* Migrate zone_unusable bytes back */
|
||||||
cache->zone_unusable = cache->alloc_offset - cache->used;
|
cache->zone_unusable =
|
||||||
|
(cache->alloc_offset - cache->used) +
|
||||||
|
(cache->length - cache->zone_capacity);
|
||||||
sinfo->bytes_zone_unusable += cache->zone_unusable;
|
sinfo->bytes_zone_unusable += cache->zone_unusable;
|
||||||
sinfo->bytes_readonly -= cache->zone_unusable;
|
sinfo->bytes_readonly -= cache->zone_unusable;
|
||||||
}
|
}
|
||||||
@@ -3143,7 +3182,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
|
|||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||||
u64 bytenr, u64 num_bytes, int alloc)
|
u64 bytenr, u64 num_bytes, bool alloc)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *info = trans->fs_info;
|
struct btrfs_fs_info *info = trans->fs_info;
|
||||||
struct btrfs_block_group *cache = NULL;
|
struct btrfs_block_group *cache = NULL;
|
||||||
@@ -3380,36 +3419,17 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
|
|||||||
*/
|
*/
|
||||||
check_system_chunk(trans, flags);
|
check_system_chunk(trans, flags);
|
||||||
|
|
||||||
bg = btrfs_alloc_chunk(trans, flags);
|
bg = btrfs_create_chunk(trans, flags);
|
||||||
if (IS_ERR(bg)) {
|
if (IS_ERR(bg)) {
|
||||||
ret = PTR_ERR(bg);
|
ret = PTR_ERR(bg);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* If this is a system chunk allocation then stop right here and do not
|
|
||||||
* add the chunk item to the chunk btree. This is to prevent a deadlock
|
|
||||||
* because this system chunk allocation can be triggered while COWing
|
|
||||||
* some extent buffer of the chunk btree and while holding a lock on a
|
|
||||||
* parent extent buffer, in which case attempting to insert the chunk
|
|
||||||
* item (or update the device item) would result in a deadlock on that
|
|
||||||
* parent extent buffer. In this case defer the chunk btree updates to
|
|
||||||
* the second phase of chunk allocation and keep our reservation until
|
|
||||||
* the second phase completes.
|
|
||||||
*
|
|
||||||
* This is a rare case and can only be triggered by the very few cases
|
|
||||||
* we have where we need to touch the chunk btree outside chunk allocation
|
|
||||||
* and chunk removal. These cases are basically adding a device, removing
|
|
||||||
* a device or resizing a device.
|
|
||||||
*/
|
|
||||||
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
|
ret = btrfs_chunk_alloc_add_chunk_item(trans, bg);
|
||||||
/*
|
/*
|
||||||
* Normally we are not expected to fail with -ENOSPC here, since we have
|
* Normally we are not expected to fail with -ENOSPC here, since we have
|
||||||
* previously reserved space in the system space_info and allocated one
|
* previously reserved space in the system space_info and allocated one
|
||||||
* new system chunk if necessary. However there are two exceptions:
|
* new system chunk if necessary. However there are three exceptions:
|
||||||
*
|
*
|
||||||
* 1) We may have enough free space in the system space_info but all the
|
* 1) We may have enough free space in the system space_info but all the
|
||||||
* existing system block groups have a profile which can not be used
|
* existing system block groups have a profile which can not be used
|
||||||
@@ -3435,13 +3455,20 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags)
|
|||||||
* with enough free space got turned into RO mode by a running scrub,
|
* with enough free space got turned into RO mode by a running scrub,
|
||||||
* and in this case we have to allocate a new one and retry. We only
|
* and in this case we have to allocate a new one and retry. We only
|
||||||
* need do this allocate and retry once, since we have a transaction
|
* need do this allocate and retry once, since we have a transaction
|
||||||
* handle and scrub uses the commit root to search for block groups.
|
* handle and scrub uses the commit root to search for block groups;
|
||||||
|
*
|
||||||
|
* 3) We had one system block group with enough free space when we called
|
||||||
|
* check_system_chunk(), but after that, right before we tried to
|
||||||
|
* allocate the last extent buffer we needed, a discard operation came
|
||||||
|
* in and it temporarily removed the last free space entry from the
|
||||||
|
* block group (discard removes a free space entry, discards it, and
|
||||||
|
* then adds back the entry to the block group cache).
|
||||||
*/
|
*/
|
||||||
if (ret == -ENOSPC) {
|
if (ret == -ENOSPC) {
|
||||||
const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
|
const u64 sys_flags = btrfs_system_alloc_profile(trans->fs_info);
|
||||||
struct btrfs_block_group *sys_bg;
|
struct btrfs_block_group *sys_bg;
|
||||||
|
|
||||||
sys_bg = btrfs_alloc_chunk(trans, sys_flags);
|
sys_bg = btrfs_create_chunk(trans, sys_flags);
|
||||||
if (IS_ERR(sys_bg)) {
|
if (IS_ERR(sys_bg)) {
|
||||||
ret = PTR_ERR(sys_bg);
|
ret = PTR_ERR(sys_bg);
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -3519,7 +3546,15 @@ out:
|
|||||||
* properly, either intentionally or as a bug. One example where this is
|
* properly, either intentionally or as a bug. One example where this is
|
||||||
* done intentionally is fsync, as it does not reserve any transaction units
|
* done intentionally is fsync, as it does not reserve any transaction units
|
||||||
* and ends up allocating a variable number of metadata extents for log
|
* and ends up allocating a variable number of metadata extents for log
|
||||||
* tree extent buffers.
|
* tree extent buffers;
|
||||||
|
*
|
||||||
|
* 4) The task has reserved enough transaction units / metadata space, but right
|
||||||
|
* before it tries to allocate the last extent buffer it needs, a discard
|
||||||
|
* operation comes in and, temporarily, removes the last free space entry from
|
||||||
|
* the only metadata block group that had free space (discard starts by
|
||||||
|
* removing a free space entry from a block group, then does the discard
|
||||||
|
* operation and, once it's done, it adds back the free space entry to the
|
||||||
|
* block group).
|
||||||
*
|
*
|
||||||
* We also need this 2 phases setup when adding a device to a filesystem with
|
* We also need this 2 phases setup when adding a device to a filesystem with
|
||||||
* a seed device - we must create new metadata and system chunks without adding
|
* a seed device - we must create new metadata and system chunks without adding
|
||||||
@@ -3537,14 +3572,14 @@ out:
|
|||||||
* This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of
|
* This has happened before and commit eafa4fd0ad0607 ("btrfs: fix exhaustion of
|
||||||
* the system chunk array due to concurrent allocations") provides more details.
|
* the system chunk array due to concurrent allocations") provides more details.
|
||||||
*
|
*
|
||||||
* For allocation of system chunks, we defer the updates and insertions into the
|
* Allocation of system chunks does not happen through this function. A task that
|
||||||
* chunk btree to phase 2. This is to prevent deadlocks on extent buffers because
|
* needs to update the chunk btree (the only btree that uses system chunks), must
|
||||||
* if the chunk allocation is triggered while COWing an extent buffer of the
|
* preallocate chunk space by calling either check_system_chunk() or
|
||||||
* chunk btree, we are holding a lock on the parent of that extent buffer and
|
* btrfs_reserve_chunk_metadata() - the former is used when allocating a data or
|
||||||
* doing the chunk btree updates and insertions can require locking that parent.
|
* metadata chunk or when removing a chunk, while the later is used before doing
|
||||||
* This is for the very few and rare cases where we update the chunk btree that
|
* a modification to the chunk btree - use cases for the later are adding,
|
||||||
* are not chunk allocation or chunk removal: adding a device, removing a device
|
* removing and resizing a device as well as relocation of a system chunk.
|
||||||
* or resizing a device.
|
* See the comment below for more details.
|
||||||
*
|
*
|
||||||
* The reservation of system space, done through check_system_chunk(), as well
|
* The reservation of system space, done through check_system_chunk(), as well
|
||||||
* as all the updates and insertions into the chunk btree must be done while
|
* as all the updates and insertions into the chunk btree must be done while
|
||||||
@@ -3581,11 +3616,27 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
|||||||
if (trans->allocating_chunk)
|
if (trans->allocating_chunk)
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
/*
|
/*
|
||||||
* If we are removing a chunk, don't re-enter or we would deadlock.
|
* Allocation of system chunks can not happen through this path, as we
|
||||||
* System space reservation and system chunk allocation is done by the
|
* could end up in a deadlock if we are allocating a data or metadata
|
||||||
* chunk remove operation (btrfs_remove_chunk()).
|
* chunk and there is another task modifying the chunk btree.
|
||||||
|
*
|
||||||
|
* This is because while we are holding the chunk mutex, we will attempt
|
||||||
|
* to add the new chunk item to the chunk btree or update an existing
|
||||||
|
* device item in the chunk btree, while the other task that is modifying
|
||||||
|
* the chunk btree is attempting to COW an extent buffer while holding a
|
||||||
|
* lock on it and on its parent - if the COW operation triggers a system
|
||||||
|
* chunk allocation, then we can deadlock because we are holding the
|
||||||
|
* chunk mutex and we may need to access that extent buffer or its parent
|
||||||
|
* in order to add the chunk item or update a device item.
|
||||||
|
*
|
||||||
|
* Tasks that want to modify the chunk tree should reserve system space
|
||||||
|
* before updating the chunk btree, by calling either
|
||||||
|
* btrfs_reserve_chunk_metadata() or check_system_chunk().
|
||||||
|
* It's possible that after a task reserves the space, it still ends up
|
||||||
|
* here - this happens in the cases described above at do_chunk_alloc().
|
||||||
|
* The task will have to either retry or fail.
|
||||||
*/
|
*/
|
||||||
if (trans->removing_chunk)
|
if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
|
|
||||||
space_info = btrfs_find_space_info(fs_info, flags);
|
space_info = btrfs_find_space_info(fs_info, flags);
|
||||||
@@ -3684,17 +3735,14 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
|
|||||||
return num_dev;
|
return num_dev;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void reserve_chunk_space(struct btrfs_trans_handle *trans,
|
||||||
* Reserve space in the system space for allocating or removing a chunk
|
u64 bytes,
|
||||||
*/
|
u64 type)
|
||||||
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_space_info *info;
|
struct btrfs_space_info *info;
|
||||||
u64 left;
|
u64 left;
|
||||||
u64 thresh;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
u64 num_devs;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Needed because we can end up allocating a system chunk and for an
|
* Needed because we can end up allocating a system chunk and for an
|
||||||
@@ -3707,19 +3755,13 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
|||||||
left = info->total_bytes - btrfs_space_info_used(info, true);
|
left = info->total_bytes - btrfs_space_info_used(info, true);
|
||||||
spin_unlock(&info->lock);
|
spin_unlock(&info->lock);
|
||||||
|
|
||||||
num_devs = get_profile_num_devs(fs_info, type);
|
if (left < bytes && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
|
||||||
|
|
||||||
/* num_devs device items to update and 1 chunk item to add or remove */
|
|
||||||
thresh = btrfs_calc_metadata_size(fs_info, num_devs) +
|
|
||||||
btrfs_calc_insert_metadata_size(fs_info, 1);
|
|
||||||
|
|
||||||
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
|
|
||||||
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
|
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
|
||||||
left, thresh, type);
|
left, bytes, type);
|
||||||
btrfs_dump_space_info(fs_info, info, 0, 0);
|
btrfs_dump_space_info(fs_info, info, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (left < thresh) {
|
if (left < bytes) {
|
||||||
u64 flags = btrfs_system_alloc_profile(fs_info);
|
u64 flags = btrfs_system_alloc_profile(fs_info);
|
||||||
struct btrfs_block_group *bg;
|
struct btrfs_block_group *bg;
|
||||||
|
|
||||||
@@ -3728,21 +3770,20 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
|||||||
* needing it, as we might not need to COW all nodes/leafs from
|
* needing it, as we might not need to COW all nodes/leafs from
|
||||||
* the paths we visit in the chunk tree (they were already COWed
|
* the paths we visit in the chunk tree (they were already COWed
|
||||||
* or created in the current transaction for example).
|
* or created in the current transaction for example).
|
||||||
*
|
|
||||||
* Also, if our caller is allocating a system chunk, do not
|
|
||||||
* attempt to insert the chunk item in the chunk btree, as we
|
|
||||||
* could deadlock on an extent buffer since our caller may be
|
|
||||||
* COWing an extent buffer from the chunk btree.
|
|
||||||
*/
|
*/
|
||||||
bg = btrfs_alloc_chunk(trans, flags);
|
bg = btrfs_create_chunk(trans, flags);
|
||||||
if (IS_ERR(bg)) {
|
if (IS_ERR(bg)) {
|
||||||
ret = PTR_ERR(bg);
|
ret = PTR_ERR(bg);
|
||||||
} else if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
|
} else {
|
||||||
/*
|
/*
|
||||||
* If we fail to add the chunk item here, we end up
|
* If we fail to add the chunk item here, we end up
|
||||||
* trying again at phase 2 of chunk allocation, at
|
* trying again at phase 2 of chunk allocation, at
|
||||||
* btrfs_create_pending_block_groups(). So ignore
|
* btrfs_create_pending_block_groups(). So ignore
|
||||||
* any error here.
|
* any error here. An ENOSPC here could happen, due to
|
||||||
|
* the cases described at do_chunk_alloc() - the system
|
||||||
|
* block group we just created was just turned into RO
|
||||||
|
* mode by a scrub for example, or a running discard
|
||||||
|
* temporarily removed its free space entries, etc.
|
||||||
*/
|
*/
|
||||||
btrfs_chunk_alloc_add_chunk_item(trans, bg);
|
btrfs_chunk_alloc_add_chunk_item(trans, bg);
|
||||||
}
|
}
|
||||||
@@ -3751,12 +3792,61 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
|||||||
if (!ret) {
|
if (!ret) {
|
||||||
ret = btrfs_block_rsv_add(fs_info->chunk_root,
|
ret = btrfs_block_rsv_add(fs_info->chunk_root,
|
||||||
&fs_info->chunk_block_rsv,
|
&fs_info->chunk_block_rsv,
|
||||||
thresh, BTRFS_RESERVE_NO_FLUSH);
|
bytes, BTRFS_RESERVE_NO_FLUSH);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
trans->chunk_bytes_reserved += thresh;
|
trans->chunk_bytes_reserved += bytes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reserve space in the system space for allocating or removing a chunk.
|
||||||
|
* The caller must be holding fs_info->chunk_mutex.
|
||||||
|
*/
|
||||||
|
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
|
const u64 num_devs = get_profile_num_devs(fs_info, type);
|
||||||
|
u64 bytes;
|
||||||
|
|
||||||
|
/* num_devs device items to update and 1 chunk item to add or remove. */
|
||||||
|
bytes = btrfs_calc_metadata_size(fs_info, num_devs) +
|
||||||
|
btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||||
|
|
||||||
|
reserve_chunk_space(trans, bytes, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reserve space in the system space, if needed, for doing a modification to the
|
||||||
|
* chunk btree.
|
||||||
|
*
|
||||||
|
* @trans: A transaction handle.
|
||||||
|
* @is_item_insertion: Indicate if the modification is for inserting a new item
|
||||||
|
* in the chunk btree or if it's for the deletion or update
|
||||||
|
* of an existing item.
|
||||||
|
*
|
||||||
|
* This is used in a context where we need to update the chunk btree outside
|
||||||
|
* block group allocation and removal, to avoid a deadlock with a concurrent
|
||||||
|
* task that is allocating a metadata or data block group and therefore needs to
|
||||||
|
* update the chunk btree while holding the chunk mutex. After the update to the
|
||||||
|
* chunk btree is done, btrfs_trans_release_chunk_metadata() should be called.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||||
|
bool is_item_insertion)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
|
u64 bytes;
|
||||||
|
|
||||||
|
if (is_item_insertion)
|
||||||
|
bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
|
||||||
|
else
|
||||||
|
bytes = btrfs_calc_metadata_size(fs_info, 1);
|
||||||
|
|
||||||
|
mutex_lock(&fs_info->chunk_mutex);
|
||||||
|
reserve_chunk_space(trans, bytes, BTRFS_BLOCK_GROUP_SYSTEM);
|
||||||
|
mutex_unlock(&fs_info->chunk_mutex);
|
||||||
|
}
|
||||||
|
|
||||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
||||||
{
|
{
|
||||||
struct btrfs_block_group *block_group;
|
struct btrfs_block_group *block_group;
|
||||||
@@ -3833,6 +3923,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
|||||||
}
|
}
|
||||||
spin_unlock(&info->unused_bgs_lock);
|
spin_unlock(&info->unused_bgs_lock);
|
||||||
|
|
||||||
|
spin_lock(&info->zone_active_bgs_lock);
|
||||||
|
while (!list_empty(&info->zone_active_bgs)) {
|
||||||
|
block_group = list_first_entry(&info->zone_active_bgs,
|
||||||
|
struct btrfs_block_group,
|
||||||
|
active_bg_list);
|
||||||
|
list_del_init(&block_group->active_bg_list);
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
}
|
||||||
|
spin_unlock(&info->zone_active_bgs_lock);
|
||||||
|
|
||||||
spin_lock(&info->block_group_cache_lock);
|
spin_lock(&info->block_group_cache_lock);
|
||||||
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
||||||
block_group = rb_entry(n, struct btrfs_block_group,
|
block_group = rb_entry(n, struct btrfs_block_group,
|
||||||
|
|||||||
@@ -98,6 +98,7 @@ struct btrfs_block_group {
|
|||||||
unsigned int to_copy:1;
|
unsigned int to_copy:1;
|
||||||
unsigned int relocating_repair:1;
|
unsigned int relocating_repair:1;
|
||||||
unsigned int chunk_item_inserted:1;
|
unsigned int chunk_item_inserted:1;
|
||||||
|
unsigned int zone_is_active:1;
|
||||||
|
|
||||||
int disk_cache_state;
|
int disk_cache_state;
|
||||||
|
|
||||||
@@ -202,7 +203,10 @@ struct btrfs_block_group {
|
|||||||
*/
|
*/
|
||||||
u64 alloc_offset;
|
u64 alloc_offset;
|
||||||
u64 zone_unusable;
|
u64 zone_unusable;
|
||||||
|
u64 zone_capacity;
|
||||||
u64 meta_write_pointer;
|
u64 meta_write_pointer;
|
||||||
|
struct map_lookup *physical_map;
|
||||||
|
struct list_head active_bg_list;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||||
@@ -280,7 +284,7 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
|
|||||||
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
|
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
|
||||||
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
|
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
|
||||||
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||||
u64 bytenr, u64 num_bytes, int alloc);
|
u64 bytenr, u64 num_bytes, bool alloc);
|
||||||
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
|
int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
|
||||||
u64 ram_bytes, u64 num_bytes, int delalloc);
|
u64 ram_bytes, u64 num_bytes, int delalloc);
|
||||||
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
|
void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
|
||||||
@@ -289,6 +293,8 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
|||||||
enum btrfs_chunk_alloc_enum force);
|
enum btrfs_chunk_alloc_enum force);
|
||||||
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
|
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
|
||||||
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
|
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
|
||||||
|
void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||||
|
bool is_item_insertion);
|
||||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
||||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||||
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
||||||
|
|||||||
@@ -138,17 +138,34 @@ struct btrfs_inode {
|
|||||||
/* a local copy of root's last_log_commit */
|
/* a local copy of root's last_log_commit */
|
||||||
int last_log_commit;
|
int last_log_commit;
|
||||||
|
|
||||||
/* total number of bytes pending delalloc, used by stat to calc the
|
union {
|
||||||
* real block usage of the file
|
/*
|
||||||
|
* Total number of bytes pending delalloc, used by stat to
|
||||||
|
* calculate the real block usage of the file. This is used
|
||||||
|
* only for files.
|
||||||
*/
|
*/
|
||||||
u64 delalloc_bytes;
|
u64 delalloc_bytes;
|
||||||
|
/*
|
||||||
|
* The offset of the last dir item key that was logged.
|
||||||
|
* This is used only for directories.
|
||||||
|
*/
|
||||||
|
u64 last_dir_item_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
/*
|
/*
|
||||||
* Total number of bytes pending delalloc that fall within a file
|
* Total number of bytes pending delalloc that fall within a file
|
||||||
* range that is either a hole or beyond EOF (and no prealloc extent
|
* range that is either a hole or beyond EOF (and no prealloc extent
|
||||||
* exists in the range). This is always <= delalloc_bytes.
|
* exists in the range). This is always <= delalloc_bytes and this
|
||||||
|
* is used only for files.
|
||||||
*/
|
*/
|
||||||
u64 new_delalloc_bytes;
|
u64 new_delalloc_bytes;
|
||||||
|
/*
|
||||||
|
* The offset of the last dir index key that was logged.
|
||||||
|
* This is used only for directories.
|
||||||
|
*/
|
||||||
|
u64 last_dir_index_offset;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* total number of bytes pending defrag, used by stat to check whether
|
* total number of bytes pending defrag, used by stat to check whether
|
||||||
@@ -339,7 +356,12 @@ static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
|
|||||||
|
|
||||||
struct btrfs_dio_private {
|
struct btrfs_dio_private {
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
u64 logical_offset;
|
|
||||||
|
/*
|
||||||
|
* Since DIO can use anonymous page, we cannot use page_offset() to
|
||||||
|
* grab the file offset, thus need a dedicated member for file offset.
|
||||||
|
*/
|
||||||
|
u64 file_offset;
|
||||||
u64 disk_bytenr;
|
u64 disk_bytenr;
|
||||||
/* Used for bio::bi_size */
|
/* Used for bio::bi_size */
|
||||||
u32 bytes;
|
u32 bytes;
|
||||||
|
|||||||
@@ -186,7 +186,6 @@ struct btrfsic_dev_state {
|
|||||||
struct list_head collision_resolving_node; /* list node */
|
struct list_head collision_resolving_node; /* list node */
|
||||||
struct btrfsic_block dummy_block_for_bio_bh_flush;
|
struct btrfsic_block dummy_block_for_bio_bh_flush;
|
||||||
u64 last_flush_gen;
|
u64 last_flush_gen;
|
||||||
char name[BDEVNAME_SIZE];
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfsic_block_hashtable {
|
struct btrfsic_block_hashtable {
|
||||||
@@ -403,7 +402,6 @@ static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
|
|||||||
ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
|
ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
|
||||||
ds->bdev = NULL;
|
ds->bdev = NULL;
|
||||||
ds->state = NULL;
|
ds->state = NULL;
|
||||||
ds->name[0] = '\0';
|
|
||||||
INIT_LIST_HEAD(&ds->collision_resolving_node);
|
INIT_LIST_HEAD(&ds->collision_resolving_node);
|
||||||
ds->last_flush_gen = 0;
|
ds->last_flush_gen = 0;
|
||||||
btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
|
btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
|
||||||
@@ -756,10 +754,10 @@ static int btrfsic_process_superblock_dev_mirror(
|
|||||||
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
|
superblock_tmp->mirror_num = 1 + superblock_mirror_num;
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||||
btrfs_info_in_rcu(fs_info,
|
btrfs_info_in_rcu(fs_info,
|
||||||
"new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
|
"new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)",
|
||||||
superblock_bdev,
|
superblock_bdev,
|
||||||
rcu_str_deref(device->name), dev_bytenr,
|
rcu_str_deref(device->name), dev_bytenr,
|
||||||
dev_state->name, dev_bytenr,
|
dev_state->bdev, dev_bytenr,
|
||||||
superblock_mirror_num);
|
superblock_mirror_num);
|
||||||
list_add(&superblock_tmp->all_blocks_node,
|
list_add(&superblock_tmp->all_blocks_node,
|
||||||
&state->all_blocks_list);
|
&state->all_blocks_list);
|
||||||
@@ -938,9 +936,10 @@ continue_with_current_leaf_stack_frame:
|
|||||||
if (disk_item_offset + sizeof(struct btrfs_item) >
|
if (disk_item_offset + sizeof(struct btrfs_item) >
|
||||||
sf->block_ctx->len) {
|
sf->block_ctx->len) {
|
||||||
leaf_item_out_of_bounce_error:
|
leaf_item_out_of_bounce_error:
|
||||||
pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
|
pr_info(
|
||||||
|
"btrfsic: leaf item out of bounce at logical %llu, dev %pg\n",
|
||||||
sf->block_ctx->start,
|
sf->block_ctx->start,
|
||||||
sf->block_ctx->dev->name);
|
sf->block_ctx->dev->bdev);
|
||||||
goto one_stack_frame_backwards;
|
goto one_stack_frame_backwards;
|
||||||
}
|
}
|
||||||
btrfsic_read_from_block_data(sf->block_ctx,
|
btrfsic_read_from_block_data(sf->block_ctx,
|
||||||
@@ -1058,9 +1057,10 @@ continue_with_current_node_stack_frame:
|
|||||||
(uintptr_t)nodehdr;
|
(uintptr_t)nodehdr;
|
||||||
if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
|
if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
|
||||||
sf->block_ctx->len) {
|
sf->block_ctx->len) {
|
||||||
pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n",
|
pr_info(
|
||||||
|
"btrfsic: node item out of bounce at logical %llu, dev %pg\n",
|
||||||
sf->block_ctx->start,
|
sf->block_ctx->start,
|
||||||
sf->block_ctx->dev->name);
|
sf->block_ctx->dev->bdev);
|
||||||
goto one_stack_frame_backwards;
|
goto one_stack_frame_backwards;
|
||||||
}
|
}
|
||||||
btrfsic_read_from_block_data(
|
btrfsic_read_from_block_data(
|
||||||
@@ -1228,15 +1228,17 @@ static int btrfsic_create_link_to_next_block(
|
|||||||
if (next_block->logical_bytenr != next_bytenr &&
|
if (next_block->logical_bytenr != next_bytenr &&
|
||||||
!(!next_block->is_metadata &&
|
!(!next_block->is_metadata &&
|
||||||
0 == next_block->logical_bytenr))
|
0 == next_block->logical_bytenr))
|
||||||
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
|
pr_info(
|
||||||
next_bytenr, next_block_ctx->dev->name,
|
"referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
|
||||||
|
next_bytenr, next_block_ctx->dev->bdev,
|
||||||
next_block_ctx->dev_bytenr, *mirror_nump,
|
next_block_ctx->dev_bytenr, *mirror_nump,
|
||||||
btrfsic_get_block_type(state,
|
btrfsic_get_block_type(state,
|
||||||
next_block),
|
next_block),
|
||||||
next_block->logical_bytenr);
|
next_block->logical_bytenr);
|
||||||
else
|
else
|
||||||
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
|
pr_info(
|
||||||
next_bytenr, next_block_ctx->dev->name,
|
"referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n",
|
||||||
|
next_bytenr, next_block_ctx->dev->bdev,
|
||||||
next_block_ctx->dev_bytenr, *mirror_nump,
|
next_block_ctx->dev_bytenr, *mirror_nump,
|
||||||
btrfsic_get_block_type(state,
|
btrfsic_get_block_type(state,
|
||||||
next_block));
|
next_block));
|
||||||
@@ -1324,8 +1326,8 @@ static int btrfsic_handle_extent_data(
|
|||||||
if (file_extent_item_offset +
|
if (file_extent_item_offset +
|
||||||
offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
|
offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
|
||||||
block_ctx->len) {
|
block_ctx->len) {
|
||||||
pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
|
pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
|
||||||
block_ctx->start, block_ctx->dev->name);
|
block_ctx->start, block_ctx->dev->bdev);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1344,8 +1346,8 @@ static int btrfsic_handle_extent_data(
|
|||||||
|
|
||||||
if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
|
if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
|
||||||
block_ctx->len) {
|
block_ctx->len) {
|
||||||
pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n",
|
pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n",
|
||||||
block_ctx->start, block_ctx->dev->name);
|
block_ctx->start, block_ctx->dev->bdev);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
btrfsic_read_from_block_data(block_ctx, &file_extent_item,
|
btrfsic_read_from_block_data(block_ctx, &file_extent_item,
|
||||||
@@ -1421,9 +1423,10 @@ static int btrfsic_handle_extent_data(
|
|||||||
next_block->logical_bytenr != next_bytenr &&
|
next_block->logical_bytenr != next_bytenr &&
|
||||||
!(!next_block->is_metadata &&
|
!(!next_block->is_metadata &&
|
||||||
0 == next_block->logical_bytenr)) {
|
0 == next_block->logical_bytenr)) {
|
||||||
pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n",
|
pr_info(
|
||||||
|
"referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n",
|
||||||
next_bytenr,
|
next_bytenr,
|
||||||
next_block_ctx.dev->name,
|
next_block_ctx.dev->bdev,
|
||||||
next_block_ctx.dev_bytenr,
|
next_block_ctx.dev_bytenr,
|
||||||
mirror_num,
|
mirror_num,
|
||||||
next_block->logical_bytenr);
|
next_block->logical_bytenr);
|
||||||
@@ -1455,7 +1458,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
|
|||||||
struct btrfs_fs_info *fs_info = state->fs_info;
|
struct btrfs_fs_info *fs_info = state->fs_info;
|
||||||
int ret;
|
int ret;
|
||||||
u64 length;
|
u64 length;
|
||||||
struct btrfs_bio *multi = NULL;
|
struct btrfs_io_context *multi = NULL;
|
||||||
struct btrfs_device *device;
|
struct btrfs_device *device;
|
||||||
|
|
||||||
length = len;
|
length = len;
|
||||||
@@ -1561,7 +1564,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
|
|||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
unsigned int j;
|
unsigned int j;
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(num_pages - i);
|
bio = btrfs_bio_alloc(num_pages - i);
|
||||||
bio_set_dev(bio, block_ctx->dev->bdev);
|
bio_set_dev(bio, block_ctx->dev->bdev);
|
||||||
bio->bi_iter.bi_sector = dev_bytenr >> 9;
|
bio->bi_iter.bi_sector = dev_bytenr >> 9;
|
||||||
bio->bi_opf = REQ_OP_READ;
|
bio->bi_opf = REQ_OP_READ;
|
||||||
@@ -1577,8 +1580,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (submit_bio_wait(bio)) {
|
if (submit_bio_wait(bio)) {
|
||||||
pr_info("btrfsic: read error at logical %llu dev %s!\n",
|
pr_info("btrfsic: read error at logical %llu dev %pg!\n",
|
||||||
block_ctx->start, block_ctx->dev->name);
|
block_ctx->start, block_ctx->dev->bdev);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@@ -1602,33 +1605,35 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
|
|||||||
list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
|
list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
|
||||||
const struct btrfsic_block_link *l;
|
const struct btrfsic_block_link *l;
|
||||||
|
|
||||||
pr_info("%c-block @%llu (%s/%llu/%d)\n",
|
pr_info("%c-block @%llu (%pg/%llu/%d)\n",
|
||||||
btrfsic_get_block_type(state, b_all),
|
btrfsic_get_block_type(state, b_all),
|
||||||
b_all->logical_bytenr, b_all->dev_state->name,
|
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||||
b_all->dev_bytenr, b_all->mirror_num);
|
b_all->dev_bytenr, b_all->mirror_num);
|
||||||
|
|
||||||
list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
|
list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
|
||||||
pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n",
|
pr_info(
|
||||||
|
" %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n",
|
||||||
btrfsic_get_block_type(state, b_all),
|
btrfsic_get_block_type(state, b_all),
|
||||||
b_all->logical_bytenr, b_all->dev_state->name,
|
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||||
b_all->dev_bytenr, b_all->mirror_num,
|
b_all->dev_bytenr, b_all->mirror_num,
|
||||||
l->ref_cnt,
|
l->ref_cnt,
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
|
list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
|
||||||
pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
|
pr_info(
|
||||||
|
" %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
|
||||||
btrfsic_get_block_type(state, b_all),
|
btrfsic_get_block_type(state, b_all),
|
||||||
b_all->logical_bytenr, b_all->dev_state->name,
|
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||||
b_all->dev_bytenr, b_all->mirror_num,
|
b_all->dev_bytenr, b_all->mirror_num,
|
||||||
l->ref_cnt,
|
l->ref_cnt,
|
||||||
btrfsic_get_block_type(state, l->block_ref_from),
|
btrfsic_get_block_type(state, l->block_ref_from),
|
||||||
l->block_ref_from->logical_bytenr,
|
l->block_ref_from->logical_bytenr,
|
||||||
l->block_ref_from->dev_state->name,
|
l->block_ref_from->dev_state->bdev,
|
||||||
l->block_ref_from->dev_bytenr,
|
l->block_ref_from->dev_bytenr,
|
||||||
l->block_ref_from->mirror_num);
|
l->block_ref_from->mirror_num);
|
||||||
}
|
}
|
||||||
@@ -1743,16 +1748,18 @@ again:
|
|||||||
if (block->logical_bytenr != bytenr &&
|
if (block->logical_bytenr != bytenr &&
|
||||||
!(!block->is_metadata &&
|
!(!block->is_metadata &&
|
||||||
block->logical_bytenr == 0))
|
block->logical_bytenr == 0))
|
||||||
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
|
pr_info(
|
||||||
bytenr, dev_state->name,
|
"written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n",
|
||||||
|
bytenr, dev_state->bdev,
|
||||||
dev_bytenr,
|
dev_bytenr,
|
||||||
block->mirror_num,
|
block->mirror_num,
|
||||||
btrfsic_get_block_type(state,
|
btrfsic_get_block_type(state,
|
||||||
block),
|
block),
|
||||||
block->logical_bytenr);
|
block->logical_bytenr);
|
||||||
else
|
else
|
||||||
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
|
pr_info(
|
||||||
bytenr, dev_state->name,
|
"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
|
||||||
|
bytenr, dev_state->bdev,
|
||||||
dev_bytenr, block->mirror_num,
|
dev_bytenr, block->mirror_num,
|
||||||
btrfsic_get_block_type(state,
|
btrfsic_get_block_type(state,
|
||||||
block));
|
block));
|
||||||
@@ -1767,8 +1774,9 @@ again:
|
|||||||
processed_len = state->datablock_size;
|
processed_len = state->datablock_size;
|
||||||
bytenr = block->logical_bytenr;
|
bytenr = block->logical_bytenr;
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
|
pr_info(
|
||||||
bytenr, dev_state->name, dev_bytenr,
|
"written block @%llu (%pg/%llu/%d) found in hash table, %c\n",
|
||||||
|
bytenr, dev_state->bdev, dev_bytenr,
|
||||||
block->mirror_num,
|
block->mirror_num,
|
||||||
btrfsic_get_block_type(state, block));
|
btrfsic_get_block_type(state, block));
|
||||||
}
|
}
|
||||||
@@ -1778,9 +1786,10 @@ again:
|
|||||||
list_empty(&block->ref_to_list) ? ' ' : '!',
|
list_empty(&block->ref_to_list) ? ' ' : '!',
|
||||||
list_empty(&block->ref_from_list) ? ' ' : '!');
|
list_empty(&block->ref_from_list) ? ' ' : '!');
|
||||||
if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
|
if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
|
||||||
pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
|
||||||
btrfsic_get_block_type(state, block), bytenr,
|
btrfsic_get_block_type(state, block), bytenr,
|
||||||
dev_state->name, dev_bytenr, block->mirror_num,
|
dev_state->bdev, dev_bytenr, block->mirror_num,
|
||||||
block->generation,
|
block->generation,
|
||||||
btrfs_disk_key_objectid(&block->disk_key),
|
btrfs_disk_key_objectid(&block->disk_key),
|
||||||
block->disk_key.type,
|
block->disk_key.type,
|
||||||
@@ -1792,9 +1801,10 @@ again:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!block->is_iodone && !block->never_written) {
|
if (!block->is_iodone && !block->never_written) {
|
||||||
pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
|
||||||
btrfsic_get_block_type(state, block), bytenr,
|
btrfsic_get_block_type(state, block), bytenr,
|
||||||
dev_state->name, dev_bytenr, block->mirror_num,
|
dev_state->bdev, dev_bytenr, block->mirror_num,
|
||||||
block->generation,
|
block->generation,
|
||||||
btrfs_stack_header_generation(
|
btrfs_stack_header_generation(
|
||||||
(struct btrfs_header *)
|
(struct btrfs_header *)
|
||||||
@@ -1921,8 +1931,9 @@ again:
|
|||||||
if (!is_metadata) {
|
if (!is_metadata) {
|
||||||
processed_len = state->datablock_size;
|
processed_len = state->datablock_size;
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("Written block (%s/%llu/?) !found in hash table, D.\n",
|
pr_info(
|
||||||
dev_state->name, dev_bytenr);
|
"written block (%pg/%llu/?) !found in hash table, D\n",
|
||||||
|
dev_state->bdev, dev_bytenr);
|
||||||
if (!state->include_extent_data) {
|
if (!state->include_extent_data) {
|
||||||
/* ignore that written D block */
|
/* ignore that written D block */
|
||||||
goto continue_loop;
|
goto continue_loop;
|
||||||
@@ -1939,8 +1950,9 @@ again:
|
|||||||
btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
|
btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
|
||||||
dev_bytenr);
|
dev_bytenr);
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n",
|
pr_info(
|
||||||
bytenr, dev_state->name, dev_bytenr);
|
"written block @%llu (%pg/%llu/?) !found in hash table, M\n",
|
||||||
|
bytenr, dev_state->bdev, dev_bytenr);
|
||||||
}
|
}
|
||||||
|
|
||||||
block_ctx.dev = dev_state;
|
block_ctx.dev = dev_state;
|
||||||
@@ -1995,9 +2007,9 @@ again:
|
|||||||
block->next_in_same_bio = NULL;
|
block->next_in_same_bio = NULL;
|
||||||
}
|
}
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("New written %c-block @%llu (%s/%llu/%d)\n",
|
pr_info("new written %c-block @%llu (%pg/%llu/%d)\n",
|
||||||
is_metadata ? 'M' : 'D',
|
is_metadata ? 'M' : 'D',
|
||||||
block->logical_bytenr, block->dev_state->name,
|
block->logical_bytenr, block->dev_state->bdev,
|
||||||
block->dev_bytenr, block->mirror_num);
|
block->dev_bytenr, block->mirror_num);
|
||||||
list_add(&block->all_blocks_node, &state->all_blocks_list);
|
list_add(&block->all_blocks_node, &state->all_blocks_list);
|
||||||
btrfsic_block_hashtable_add(block, &state->block_hashtable);
|
btrfsic_block_hashtable_add(block, &state->block_hashtable);
|
||||||
@@ -2041,10 +2053,10 @@ static void btrfsic_bio_end_io(struct bio *bp)
|
|||||||
|
|
||||||
if ((dev_state->state->print_mask &
|
if ((dev_state->state->print_mask &
|
||||||
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
||||||
pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
|
pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n",
|
||||||
bp->bi_status,
|
bp->bi_status,
|
||||||
btrfsic_get_block_type(dev_state->state, block),
|
btrfsic_get_block_type(dev_state->state, block),
|
||||||
block->logical_bytenr, dev_state->name,
|
block->logical_bytenr, dev_state->bdev,
|
||||||
block->dev_bytenr, block->mirror_num);
|
block->dev_bytenr, block->mirror_num);
|
||||||
next_block = block->next_in_same_bio;
|
next_block = block->next_in_same_bio;
|
||||||
block->iodone_w_error = iodone_w_error;
|
block->iodone_w_error = iodone_w_error;
|
||||||
@@ -2052,8 +2064,8 @@ static void btrfsic_bio_end_io(struct bio *bp)
|
|||||||
dev_state->last_flush_gen++;
|
dev_state->last_flush_gen++;
|
||||||
if ((dev_state->state->print_mask &
|
if ((dev_state->state->print_mask &
|
||||||
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
||||||
pr_info("bio_end_io() new %s flush_gen=%llu\n",
|
pr_info("bio_end_io() new %pg flush_gen=%llu\n",
|
||||||
dev_state->name,
|
dev_state->bdev,
|
||||||
dev_state->last_flush_gen);
|
dev_state->last_flush_gen);
|
||||||
}
|
}
|
||||||
if (block->submit_bio_bh_rw & REQ_FUA)
|
if (block->submit_bio_bh_rw & REQ_FUA)
|
||||||
@@ -2078,17 +2090,19 @@ static int btrfsic_process_written_superblock(
|
|||||||
if (!(superblock->generation > state->max_superblock_generation ||
|
if (!(superblock->generation > state->max_superblock_generation ||
|
||||||
0 == state->max_superblock_generation)) {
|
0 == state->max_superblock_generation)) {
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||||
pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n",
|
pr_info(
|
||||||
|
"btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n",
|
||||||
superblock->logical_bytenr,
|
superblock->logical_bytenr,
|
||||||
superblock->dev_state->name,
|
superblock->dev_state->bdev,
|
||||||
superblock->dev_bytenr, superblock->mirror_num,
|
superblock->dev_bytenr, superblock->mirror_num,
|
||||||
btrfs_super_generation(super_hdr),
|
btrfs_super_generation(super_hdr),
|
||||||
state->max_superblock_generation);
|
state->max_superblock_generation);
|
||||||
} else {
|
} else {
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
|
||||||
pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n",
|
pr_info(
|
||||||
|
"btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n",
|
||||||
superblock->logical_bytenr,
|
superblock->logical_bytenr,
|
||||||
superblock->dev_state->name,
|
superblock->dev_state->bdev,
|
||||||
superblock->dev_bytenr, superblock->mirror_num,
|
superblock->dev_bytenr, superblock->mirror_num,
|
||||||
btrfs_super_generation(super_hdr),
|
btrfs_super_generation(super_hdr),
|
||||||
state->max_superblock_generation);
|
state->max_superblock_generation);
|
||||||
@@ -2232,38 +2246,42 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
|
|||||||
*/
|
*/
|
||||||
list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
|
list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n",
|
pr_info(
|
||||||
|
"rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n",
|
||||||
recursion_level,
|
recursion_level,
|
||||||
btrfsic_get_block_type(state, block),
|
btrfsic_get_block_type(state, block),
|
||||||
block->logical_bytenr, block->dev_state->name,
|
block->logical_bytenr, block->dev_state->bdev,
|
||||||
block->dev_bytenr, block->mirror_num,
|
block->dev_bytenr, block->mirror_num,
|
||||||
l->ref_cnt,
|
l->ref_cnt,
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
if (l->block_ref_to->never_written) {
|
if (l->block_ref_to->never_written) {
|
||||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n",
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
ret = -1;
|
ret = -1;
|
||||||
} else if (!l->block_ref_to->is_iodone) {
|
} else if (!l->block_ref_to->is_iodone) {
|
||||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n",
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
ret = -1;
|
ret = -1;
|
||||||
} else if (l->block_ref_to->iodone_w_error) {
|
} else if (l->block_ref_to->iodone_w_error) {
|
||||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n",
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
ret = -1;
|
ret = -1;
|
||||||
@@ -2273,10 +2291,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
|
|||||||
l->parent_generation &&
|
l->parent_generation &&
|
||||||
BTRFSIC_GENERATION_UNKNOWN !=
|
BTRFSIC_GENERATION_UNKNOWN !=
|
||||||
l->block_ref_to->generation) {
|
l->block_ref_to->generation) {
|
||||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n",
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num,
|
l->block_ref_to->mirror_num,
|
||||||
l->block_ref_to->generation,
|
l->block_ref_to->generation,
|
||||||
@@ -2284,10 +2303,11 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
|
|||||||
ret = -1;
|
ret = -1;
|
||||||
} else if (l->block_ref_to->flush_gen >
|
} else if (l->block_ref_to->flush_gen >
|
||||||
l->block_ref_to->dev_state->last_flush_gen) {
|
l->block_ref_to->dev_state->last_flush_gen) {
|
||||||
pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name,
|
l->block_ref_to->dev_state->bdev,
|
||||||
l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num, block->flush_gen,
|
l->block_ref_to->mirror_num, block->flush_gen,
|
||||||
l->block_ref_to->dev_state->last_flush_gen);
|
l->block_ref_to->dev_state->last_flush_gen);
|
||||||
@@ -2324,15 +2344,16 @@ static int btrfsic_is_block_ref_by_superblock(
|
|||||||
*/
|
*/
|
||||||
list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
|
list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
|
pr_info(
|
||||||
|
"rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n",
|
||||||
recursion_level,
|
recursion_level,
|
||||||
btrfsic_get_block_type(state, block),
|
btrfsic_get_block_type(state, block),
|
||||||
block->logical_bytenr, block->dev_state->name,
|
block->logical_bytenr, block->dev_state->bdev,
|
||||||
block->dev_bytenr, block->mirror_num,
|
block->dev_bytenr, block->mirror_num,
|
||||||
l->ref_cnt,
|
l->ref_cnt,
|
||||||
btrfsic_get_block_type(state, l->block_ref_from),
|
btrfsic_get_block_type(state, l->block_ref_from),
|
||||||
l->block_ref_from->logical_bytenr,
|
l->block_ref_from->logical_bytenr,
|
||||||
l->block_ref_from->dev_state->name,
|
l->block_ref_from->dev_state->bdev,
|
||||||
l->block_ref_from->dev_bytenr,
|
l->block_ref_from->dev_bytenr,
|
||||||
l->block_ref_from->mirror_num);
|
l->block_ref_from->mirror_num);
|
||||||
if (l->block_ref_from->is_superblock &&
|
if (l->block_ref_from->is_superblock &&
|
||||||
@@ -2354,30 +2375,30 @@ static int btrfsic_is_block_ref_by_superblock(
|
|||||||
static void btrfsic_print_add_link(const struct btrfsic_state *state,
|
static void btrfsic_print_add_link(const struct btrfsic_state *state,
|
||||||
const struct btrfsic_block_link *l)
|
const struct btrfsic_block_link *l)
|
||||||
{
|
{
|
||||||
pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
|
pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
|
||||||
l->ref_cnt,
|
l->ref_cnt,
|
||||||
btrfsic_get_block_type(state, l->block_ref_from),
|
btrfsic_get_block_type(state, l->block_ref_from),
|
||||||
l->block_ref_from->logical_bytenr,
|
l->block_ref_from->logical_bytenr,
|
||||||
l->block_ref_from->dev_state->name,
|
l->block_ref_from->dev_state->bdev,
|
||||||
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
|
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void btrfsic_print_rem_link(const struct btrfsic_state *state,
|
static void btrfsic_print_rem_link(const struct btrfsic_state *state,
|
||||||
const struct btrfsic_block_link *l)
|
const struct btrfsic_block_link *l)
|
||||||
{
|
{
|
||||||
pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
|
pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n",
|
||||||
l->ref_cnt,
|
l->ref_cnt,
|
||||||
btrfsic_get_block_type(state, l->block_ref_from),
|
btrfsic_get_block_type(state, l->block_ref_from),
|
||||||
l->block_ref_from->logical_bytenr,
|
l->block_ref_from->logical_bytenr,
|
||||||
l->block_ref_from->dev_state->name,
|
l->block_ref_from->dev_state->bdev,
|
||||||
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
|
l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
|
||||||
btrfsic_get_block_type(state, l->block_ref_to),
|
btrfsic_get_block_type(state, l->block_ref_to),
|
||||||
l->block_ref_to->logical_bytenr,
|
l->block_ref_to->logical_bytenr,
|
||||||
l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
|
l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr,
|
||||||
l->block_ref_to->mirror_num);
|
l->block_ref_to->mirror_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2419,9 +2440,9 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
|
|||||||
* This algorithm is recursive because the amount of used stack space
|
* This algorithm is recursive because the amount of used stack space
|
||||||
* is very small and the max recursion depth is limited.
|
* is very small and the max recursion depth is limited.
|
||||||
*/
|
*/
|
||||||
indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
|
indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)",
|
||||||
btrfsic_get_block_type(state, block),
|
btrfsic_get_block_type(state, block),
|
||||||
block->logical_bytenr, block->dev_state->name,
|
block->logical_bytenr, block->dev_state->bdev,
|
||||||
block->dev_bytenr, block->mirror_num);
|
block->dev_bytenr, block->mirror_num);
|
||||||
if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
|
if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
|
||||||
printk("[...]\n");
|
printk("[...]\n");
|
||||||
@@ -2542,10 +2563,10 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
|
|||||||
block->never_written = never_written;
|
block->never_written = never_written;
|
||||||
block->mirror_num = mirror_num;
|
block->mirror_num = mirror_num;
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||||
pr_info("New %s%c-block @%llu (%s/%llu/%d)\n",
|
pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n",
|
||||||
additional_string,
|
additional_string,
|
||||||
btrfsic_get_block_type(state, block),
|
btrfsic_get_block_type(state, block),
|
||||||
block->logical_bytenr, dev_state->name,
|
block->logical_bytenr, dev_state->bdev,
|
||||||
block->dev_bytenr, mirror_num);
|
block->dev_bytenr, mirror_num);
|
||||||
list_add(&block->all_blocks_node, &state->all_blocks_list);
|
list_add(&block->all_blocks_node, &state->all_blocks_list);
|
||||||
btrfsic_block_hashtable_add(block, &state->block_hashtable);
|
btrfsic_block_hashtable_add(block, &state->block_hashtable);
|
||||||
@@ -2592,8 +2613,9 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (WARN_ON(!match)) {
|
if (WARN_ON(!match)) {
|
||||||
pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n",
|
pr_info(
|
||||||
bytenr, dev_state->name, dev_bytenr);
|
"btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n",
|
||||||
|
bytenr, dev_state->bdev, dev_bytenr);
|
||||||
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
|
for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
|
||||||
ret = btrfsic_map_block(state, bytenr,
|
ret = btrfsic_map_block(state, bytenr,
|
||||||
state->metablock_size,
|
state->metablock_size,
|
||||||
@@ -2601,8 +2623,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
|
|||||||
if (ret)
|
if (ret)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n",
|
pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n",
|
||||||
bytenr, block_ctx.dev->name,
|
bytenr, block_ctx.dev->bdev,
|
||||||
block_ctx.dev_bytenr, mirror_num);
|
block_ctx.dev_bytenr, mirror_num);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2675,8 +2697,9 @@ static void __btrfsic_submit_bio(struct bio *bio)
|
|||||||
if ((dev_state->state->print_mask &
|
if ((dev_state->state->print_mask &
|
||||||
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
|
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
|
||||||
BTRFSIC_PRINT_MASK_VERBOSE)))
|
BTRFSIC_PRINT_MASK_VERBOSE)))
|
||||||
pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n",
|
pr_info(
|
||||||
dev_state->name);
|
"btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n",
|
||||||
|
dev_state->bdev);
|
||||||
} else {
|
} else {
|
||||||
struct btrfsic_block *const block =
|
struct btrfsic_block *const block =
|
||||||
&dev_state->dummy_block_for_bio_bh_flush;
|
&dev_state->dummy_block_for_bio_bh_flush;
|
||||||
@@ -2751,7 +2774,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
list_for_each_entry(device, dev_head, dev_list) {
|
list_for_each_entry(device, dev_head, dev_list) {
|
||||||
struct btrfsic_dev_state *ds;
|
struct btrfsic_dev_state *ds;
|
||||||
const char *p;
|
|
||||||
|
|
||||||
if (!device->bdev || !device->name)
|
if (!device->bdev || !device->name)
|
||||||
continue;
|
continue;
|
||||||
@@ -2763,10 +2785,6 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info,
|
|||||||
}
|
}
|
||||||
ds->bdev = device->bdev;
|
ds->bdev = device->bdev;
|
||||||
ds->state = state;
|
ds->state = state;
|
||||||
bdevname(ds->bdev, ds->name);
|
|
||||||
ds->name[BDEVNAME_SIZE - 1] = '\0';
|
|
||||||
p = kbasename(ds->name);
|
|
||||||
strlcpy(ds->name, p, sizeof(ds->name));
|
|
||||||
btrfsic_dev_state_hashtable_add(ds,
|
btrfsic_dev_state_hashtable_add(ds,
|
||||||
&btrfsic_dev_state_hashtable);
|
&btrfsic_dev_state_hashtable);
|
||||||
}
|
}
|
||||||
@@ -2844,9 +2862,10 @@ void btrfsic_unmount(struct btrfs_fs_devices *fs_devices)
|
|||||||
if (b_all->is_iodone || b_all->never_written)
|
if (b_all->is_iodone || b_all->never_written)
|
||||||
btrfsic_block_free(b_all);
|
btrfsic_block_free(b_all);
|
||||||
else
|
else
|
||||||
pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n",
|
pr_info(
|
||||||
|
"btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n",
|
||||||
btrfsic_get_block_type(state, b_all),
|
btrfsic_get_block_type(state, b_all),
|
||||||
b_all->logical_bytenr, b_all->dev_state->name,
|
b_all->logical_bytenr, b_all->dev_state->bdev,
|
||||||
b_all->dev_bytenr, b_all->mirror_num);
|
b_all->dev_bytenr, b_all->mirror_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -29,6 +29,7 @@
|
|||||||
#include "compression.h"
|
#include "compression.h"
|
||||||
#include "extent_io.h"
|
#include "extent_io.h"
|
||||||
#include "extent_map.h"
|
#include "extent_map.h"
|
||||||
|
#include "subpage.h"
|
||||||
#include "zoned.h"
|
#include "zoned.h"
|
||||||
|
|
||||||
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
|
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
|
||||||
@@ -181,9 +182,9 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
|||||||
if (memcmp(&csum, cb_sum, csum_size) != 0) {
|
if (memcmp(&csum, cb_sum, csum_size) != 0) {
|
||||||
btrfs_print_data_csum_error(inode, disk_start,
|
btrfs_print_data_csum_error(inode, disk_start,
|
||||||
csum, cb_sum, cb->mirror_num);
|
csum, cb_sum, cb->mirror_num);
|
||||||
if (btrfs_io_bio(bio)->device)
|
if (btrfs_bio(bio)->device)
|
||||||
btrfs_dev_stat_inc_and_print(
|
btrfs_dev_stat_inc_and_print(
|
||||||
btrfs_io_bio(bio)->device,
|
btrfs_bio(bio)->device,
|
||||||
BTRFS_DEV_STAT_CORRUPTION_ERRS);
|
BTRFS_DEV_STAT_CORRUPTION_ERRS);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
@@ -194,6 +195,87 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reduce bio and io accounting for a compressed_bio with its corresponding bio.
|
||||||
|
*
|
||||||
|
* Return true if there is no pending bio nor io.
|
||||||
|
* Return false otherwise.
|
||||||
|
*/
|
||||||
|
static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||||
|
unsigned int bi_size = 0;
|
||||||
|
bool last_io = false;
|
||||||
|
struct bio_vec *bvec;
|
||||||
|
struct bvec_iter_all iter_all;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At endio time, bi_iter.bi_size doesn't represent the real bio size.
|
||||||
|
* Thus here we have to iterate through all segments to grab correct
|
||||||
|
* bio size.
|
||||||
|
*/
|
||||||
|
bio_for_each_segment_all(bvec, bio, iter_all)
|
||||||
|
bi_size += bvec->bv_len;
|
||||||
|
|
||||||
|
if (bio->bi_status)
|
||||||
|
cb->errors = 1;
|
||||||
|
|
||||||
|
ASSERT(bi_size && bi_size <= cb->compressed_len);
|
||||||
|
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
|
||||||
|
&cb->pending_sectors);
|
||||||
|
/*
|
||||||
|
* Here we must wake up the possible error handler after all other
|
||||||
|
* operations on @cb finished, or we can race with
|
||||||
|
* finish_compressed_bio_*() which may free @cb.
|
||||||
|
*/
|
||||||
|
wake_up_var(cb);
|
||||||
|
|
||||||
|
return last_io;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
|
||||||
|
{
|
||||||
|
unsigned int index;
|
||||||
|
struct page *page;
|
||||||
|
|
||||||
|
/* Release the compressed pages */
|
||||||
|
for (index = 0; index < cb->nr_pages; index++) {
|
||||||
|
page = cb->compressed_pages[index];
|
||||||
|
page->mapping = NULL;
|
||||||
|
put_page(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do io completion on the original bio */
|
||||||
|
if (cb->errors) {
|
||||||
|
bio_io_error(cb->orig_bio);
|
||||||
|
} else {
|
||||||
|
struct bio_vec *bvec;
|
||||||
|
struct bvec_iter_all iter_all;
|
||||||
|
|
||||||
|
ASSERT(bio);
|
||||||
|
ASSERT(!bio->bi_status);
|
||||||
|
/*
|
||||||
|
* We have verified the checksum already, set page checked so
|
||||||
|
* the end_io handlers know about it
|
||||||
|
*/
|
||||||
|
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
||||||
|
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
|
||||||
|
u64 bvec_start = page_offset(bvec->bv_page) +
|
||||||
|
bvec->bv_offset;
|
||||||
|
|
||||||
|
btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
|
||||||
|
bvec->bv_page, bvec_start,
|
||||||
|
bvec->bv_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
bio_endio(cb->orig_bio);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finally free the cb struct */
|
||||||
|
kfree(cb->compressed_pages);
|
||||||
|
kfree(cb);
|
||||||
|
}
|
||||||
|
|
||||||
/* when we finish reading compressed pages from the disk, we
|
/* when we finish reading compressed pages from the disk, we
|
||||||
* decompress them and then run the bio end_io routines on the
|
* decompress them and then run the bio end_io routines on the
|
||||||
* decompressed pages (in the inode address space).
|
* decompressed pages (in the inode address space).
|
||||||
@@ -208,25 +290,17 @@ static void end_compressed_bio_read(struct bio *bio)
|
|||||||
{
|
{
|
||||||
struct compressed_bio *cb = bio->bi_private;
|
struct compressed_bio *cb = bio->bi_private;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct page *page;
|
unsigned int mirror = btrfs_bio(bio)->mirror_num;
|
||||||
unsigned int index;
|
|
||||||
unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (bio->bi_status)
|
if (!dec_and_test_compressed_bio(cb, bio))
|
||||||
cb->errors = 1;
|
|
||||||
|
|
||||||
/* if there are more bios still pending for this compressed
|
|
||||||
* extent, just exit
|
|
||||||
*/
|
|
||||||
if (!refcount_dec_and_test(&cb->pending_bios))
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Record the correct mirror_num in cb->orig_bio so that
|
* Record the correct mirror_num in cb->orig_bio so that
|
||||||
* read-repair can work properly.
|
* read-repair can work properly.
|
||||||
*/
|
*/
|
||||||
btrfs_io_bio(cb->orig_bio)->mirror_num = mirror;
|
btrfs_bio(cb->orig_bio)->mirror_num = mirror;
|
||||||
cb->mirror_num = mirror;
|
cb->mirror_num = mirror;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -250,36 +324,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
|||||||
csum_failed:
|
csum_failed:
|
||||||
if (ret)
|
if (ret)
|
||||||
cb->errors = 1;
|
cb->errors = 1;
|
||||||
|
finish_compressed_bio_read(cb, bio);
|
||||||
/* release the compressed pages */
|
|
||||||
index = 0;
|
|
||||||
for (index = 0; index < cb->nr_pages; index++) {
|
|
||||||
page = cb->compressed_pages[index];
|
|
||||||
page->mapping = NULL;
|
|
||||||
put_page(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* do io completion on the original bio */
|
|
||||||
if (cb->errors) {
|
|
||||||
bio_io_error(cb->orig_bio);
|
|
||||||
} else {
|
|
||||||
struct bio_vec *bvec;
|
|
||||||
struct bvec_iter_all iter_all;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* we have verified the checksum already, set page
|
|
||||||
* checked so the end_io handlers know about it
|
|
||||||
*/
|
|
||||||
ASSERT(!bio_flagged(bio, BIO_CLONED));
|
|
||||||
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all)
|
|
||||||
SetPageChecked(bvec->bv_page);
|
|
||||||
|
|
||||||
bio_endio(cb->orig_bio);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* finally free the cb struct */
|
|
||||||
kfree(cb->compressed_pages);
|
|
||||||
kfree(cb);
|
|
||||||
out:
|
out:
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
@@ -291,6 +336,7 @@ out:
|
|||||||
static noinline void end_compressed_writeback(struct inode *inode,
|
static noinline void end_compressed_writeback(struct inode *inode,
|
||||||
const struct compressed_bio *cb)
|
const struct compressed_bio *cb)
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
unsigned long index = cb->start >> PAGE_SHIFT;
|
unsigned long index = cb->start >> PAGE_SHIFT;
|
||||||
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
|
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
|
||||||
struct page *pages[16];
|
struct page *pages[16];
|
||||||
@@ -313,7 +359,8 @@ static noinline void end_compressed_writeback(struct inode *inode,
|
|||||||
for (i = 0; i < ret; i++) {
|
for (i = 0; i < ret; i++) {
|
||||||
if (cb->errors)
|
if (cb->errors)
|
||||||
SetPageError(pages[i]);
|
SetPageError(pages[i]);
|
||||||
end_page_writeback(pages[i]);
|
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
|
||||||
|
cb->start, cb->len);
|
||||||
put_page(pages[i]);
|
put_page(pages[i]);
|
||||||
}
|
}
|
||||||
nr_pages -= ret;
|
nr_pages -= ret;
|
||||||
@@ -322,60 +369,127 @@ static noinline void end_compressed_writeback(struct inode *inode,
|
|||||||
/* the inode may be gone now */
|
/* the inode may be gone now */
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static void finish_compressed_bio_write(struct compressed_bio *cb)
|
||||||
* do the cleanup once all the compressed pages hit the disk.
|
|
||||||
* This will clear writeback on the file pages and free the compressed
|
|
||||||
* pages.
|
|
||||||
*
|
|
||||||
* This also calls the writeback end hooks for the file pages so that
|
|
||||||
* metadata and checksums can be updated in the file.
|
|
||||||
*/
|
|
||||||
static void end_compressed_bio_write(struct bio *bio)
|
|
||||||
{
|
{
|
||||||
struct compressed_bio *cb = bio->bi_private;
|
struct inode *inode = cb->inode;
|
||||||
struct inode *inode;
|
|
||||||
struct page *page;
|
|
||||||
unsigned int index;
|
unsigned int index;
|
||||||
|
|
||||||
if (bio->bi_status)
|
/*
|
||||||
cb->errors = 1;
|
* Ok, we're the last bio for this extent, step one is to call back
|
||||||
|
* into the FS and do all the end_io operations.
|
||||||
/* if there are more bios still pending for this compressed
|
|
||||||
* extent, just exit
|
|
||||||
*/
|
*/
|
||||||
if (!refcount_dec_and_test(&cb->pending_bios))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/* ok, we're the last bio for this extent, step one is to
|
|
||||||
* call back into the FS and do all the end_io operations
|
|
||||||
*/
|
|
||||||
inode = cb->inode;
|
|
||||||
btrfs_record_physical_zoned(inode, cb->start, bio);
|
|
||||||
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
|
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
|
||||||
cb->start, cb->start + cb->len - 1,
|
cb->start, cb->start + cb->len - 1,
|
||||||
!cb->errors);
|
!cb->errors);
|
||||||
|
|
||||||
end_compressed_writeback(inode, cb);
|
end_compressed_writeback(inode, cb);
|
||||||
/* note, our inode could be gone now */
|
/* Note, our inode could be gone now */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* release the compressed pages, these came from alloc_page and
|
* Release the compressed pages, these came from alloc_page and
|
||||||
* are not attached to the inode at all
|
* are not attached to the inode at all
|
||||||
*/
|
*/
|
||||||
index = 0;
|
|
||||||
for (index = 0; index < cb->nr_pages; index++) {
|
for (index = 0; index < cb->nr_pages; index++) {
|
||||||
page = cb->compressed_pages[index];
|
struct page *page = cb->compressed_pages[index];
|
||||||
|
|
||||||
page->mapping = NULL;
|
page->mapping = NULL;
|
||||||
put_page(page);
|
put_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* finally free the cb struct */
|
/* Finally free the cb struct */
|
||||||
kfree(cb->compressed_pages);
|
kfree(cb->compressed_pages);
|
||||||
kfree(cb);
|
kfree(cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do the cleanup once all the compressed pages hit the disk. This will clear
|
||||||
|
* writeback on the file pages and free the compressed pages.
|
||||||
|
*
|
||||||
|
* This also calls the writeback end hooks for the file pages so that metadata
|
||||||
|
* and checksums can be updated in the file.
|
||||||
|
*/
|
||||||
|
static void end_compressed_bio_write(struct bio *bio)
|
||||||
|
{
|
||||||
|
struct compressed_bio *cb = bio->bi_private;
|
||||||
|
|
||||||
|
if (!dec_and_test_compressed_bio(cb, bio))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
|
||||||
|
|
||||||
|
finish_compressed_bio_write(cb);
|
||||||
out:
|
out:
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
|
||||||
|
struct compressed_bio *cb,
|
||||||
|
struct bio *bio, int mirror_num)
|
||||||
|
{
|
||||||
|
blk_status_t ret;
|
||||||
|
|
||||||
|
ASSERT(bio->bi_iter.bi_size);
|
||||||
|
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a compressed_bio, which will be used to read/write on-disk
|
||||||
|
* (aka, compressed) * data.
|
||||||
|
*
|
||||||
|
* @cb: The compressed_bio structure, which records all the needed
|
||||||
|
* information to bind the compressed data to the uncompressed
|
||||||
|
* page cache.
|
||||||
|
* @disk_byten: The logical bytenr where the compressed data will be read
|
||||||
|
* from or written to.
|
||||||
|
* @endio_func: The endio function to call after the IO for compressed data
|
||||||
|
* is finished.
|
||||||
|
* @next_stripe_start: Return value of logical bytenr of where next stripe starts.
|
||||||
|
* Let the caller know to only fill the bio up to the stripe
|
||||||
|
* boundary.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
|
||||||
|
unsigned int opf, bio_end_io_t endio_func,
|
||||||
|
u64 *next_stripe_start)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||||
|
struct btrfs_io_geometry geom;
|
||||||
|
struct extent_map *em;
|
||||||
|
struct bio *bio;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||||
|
|
||||||
|
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||||
|
bio->bi_opf = opf;
|
||||||
|
bio->bi_private = cb;
|
||||||
|
bio->bi_end_io = endio_func;
|
||||||
|
|
||||||
|
em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
|
||||||
|
if (IS_ERR(em)) {
|
||||||
|
bio_put(bio);
|
||||||
|
return ERR_CAST(em);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
|
||||||
|
bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev);
|
||||||
|
|
||||||
|
ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom);
|
||||||
|
free_extent_map(em);
|
||||||
|
if (ret < 0) {
|
||||||
|
bio_put(bio);
|
||||||
|
return ERR_PTR(ret);
|
||||||
|
}
|
||||||
|
*next_stripe_start = disk_bytenr + geom.len;
|
||||||
|
|
||||||
|
return bio;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* worker function to build and submit bios for previously compressed pages.
|
* worker function to build and submit bios for previously compressed pages.
|
||||||
* The corresponding pages in the inode should be marked for writeback
|
* The corresponding pages in the inode should be marked for writeback
|
||||||
@@ -396,20 +510,19 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
|||||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||||
struct bio *bio = NULL;
|
struct bio *bio = NULL;
|
||||||
struct compressed_bio *cb;
|
struct compressed_bio *cb;
|
||||||
unsigned long bytes_left;
|
u64 cur_disk_bytenr = disk_start;
|
||||||
int pg_index = 0;
|
u64 next_stripe_start;
|
||||||
struct page *page;
|
|
||||||
u64 first_byte = disk_start;
|
|
||||||
blk_status_t ret;
|
blk_status_t ret;
|
||||||
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
|
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
|
||||||
const bool use_append = btrfs_use_zone_append(inode, disk_start);
|
const bool use_append = btrfs_use_zone_append(inode, disk_start);
|
||||||
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
|
const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
|
||||||
|
|
||||||
WARN_ON(!PAGE_ALIGNED(start));
|
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||||
|
IS_ALIGNED(len, fs_info->sectorsize));
|
||||||
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
|
||||||
if (!cb)
|
if (!cb)
|
||||||
return BLK_STS_RESOURCE;
|
return BLK_STS_RESOURCE;
|
||||||
refcount_set(&cb->pending_bios, 0);
|
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||||
cb->errors = 0;
|
cb->errors = 0;
|
||||||
cb->inode = &inode->vfs_inode;
|
cb->inode = &inode->vfs_inode;
|
||||||
cb->start = start;
|
cb->start = start;
|
||||||
@@ -420,118 +533,100 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
|||||||
cb->orig_bio = NULL;
|
cb->orig_bio = NULL;
|
||||||
cb->nr_pages = nr_pages;
|
cb->nr_pages = nr_pages;
|
||||||
|
|
||||||
bio = btrfs_bio_alloc(first_byte);
|
while (cur_disk_bytenr < disk_start + compressed_len) {
|
||||||
bio->bi_opf = bio_op | write_flags;
|
u64 offset = cur_disk_bytenr - disk_start;
|
||||||
bio->bi_private = cb;
|
unsigned int index = offset >> PAGE_SHIFT;
|
||||||
bio->bi_end_io = end_compressed_bio_write;
|
unsigned int real_size;
|
||||||
|
unsigned int added;
|
||||||
|
struct page *page = compressed_pages[index];
|
||||||
|
bool submit = false;
|
||||||
|
|
||||||
if (use_append) {
|
/* Allocate new bio if submitted or not yet allocated */
|
||||||
struct btrfs_device *device;
|
if (!bio) {
|
||||||
|
bio = alloc_compressed_bio(cb, cur_disk_bytenr,
|
||||||
device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE);
|
bio_op | write_flags, end_compressed_bio_write,
|
||||||
if (IS_ERR(device)) {
|
&next_stripe_start);
|
||||||
kfree(cb);
|
if (IS_ERR(bio)) {
|
||||||
bio_put(bio);
|
ret = errno_to_blk_status(PTR_ERR(bio));
|
||||||
return BLK_STS_NOTSUPP;
|
bio = NULL;
|
||||||
|
goto finish_cb;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio_set_dev(bio, device->bdev);
|
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
if (blkcg_css) {
|
* We should never reach next_stripe_start start as we will
|
||||||
bio->bi_opf |= REQ_CGROUP_PUNT;
|
* submit comp_bio when reach the boundary immediately.
|
||||||
kthread_associate_blkcg(blkcg_css);
|
*/
|
||||||
}
|
ASSERT(cur_disk_bytenr != next_stripe_start);
|
||||||
refcount_set(&cb->pending_bios, 1);
|
|
||||||
|
|
||||||
/* create and submit bios for the compressed pages */
|
|
||||||
bytes_left = compressed_len;
|
|
||||||
for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
|
|
||||||
int submit = 0;
|
|
||||||
int len = 0;
|
|
||||||
|
|
||||||
page = compressed_pages[pg_index];
|
|
||||||
page->mapping = inode->vfs_inode.i_mapping;
|
|
||||||
if (bio->bi_iter.bi_size)
|
|
||||||
submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
|
|
||||||
0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Page can only be added to bio if the current bio fits in
|
* We have various limits on the real read size:
|
||||||
* stripe.
|
* - stripe boundary
|
||||||
|
* - page boundary
|
||||||
|
* - compressed length boundary
|
||||||
*/
|
*/
|
||||||
if (!submit) {
|
real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr);
|
||||||
if (pg_index == 0 && use_append)
|
real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
|
||||||
len = bio_add_zone_append_page(bio, page,
|
real_size = min_t(u64, real_size, compressed_len - offset);
|
||||||
PAGE_SIZE, 0);
|
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
|
||||||
|
|
||||||
|
if (use_append)
|
||||||
|
added = bio_add_zone_append_page(bio, page, real_size,
|
||||||
|
offset_in_page(offset));
|
||||||
else
|
else
|
||||||
len = bio_add_page(bio, page, PAGE_SIZE, 0);
|
added = bio_add_page(bio, page, real_size,
|
||||||
}
|
offset_in_page(offset));
|
||||||
|
/* Reached zoned boundary */
|
||||||
|
if (added == 0)
|
||||||
|
submit = true;
|
||||||
|
|
||||||
page->mapping = NULL;
|
cur_disk_bytenr += added;
|
||||||
if (submit || len < PAGE_SIZE) {
|
/* Reached stripe boundary */
|
||||||
/*
|
if (cur_disk_bytenr == next_stripe_start)
|
||||||
* inc the count before we submit the bio so
|
submit = true;
|
||||||
* we know the end IO handler won't happen before
|
|
||||||
* we inc the count. Otherwise, the cb might get
|
|
||||||
* freed before we're done setting it up
|
|
||||||
*/
|
|
||||||
refcount_inc(&cb->pending_bios);
|
|
||||||
ret = btrfs_bio_wq_end_io(fs_info, bio,
|
|
||||||
BTRFS_WQ_ENDIO_DATA);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
|
|
||||||
|
/* Finished the range */
|
||||||
|
if (cur_disk_bytenr == disk_start + compressed_len)
|
||||||
|
submit = true;
|
||||||
|
|
||||||
|
if (submit) {
|
||||||
if (!skip_sum) {
|
if (!skip_sum) {
|
||||||
ret = btrfs_csum_one_bio(inode, bio, start, 1);
|
ret = btrfs_csum_one_bio(inode, bio, start, 1);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
if (ret)
|
||||||
|
goto finish_cb;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, bio, 0);
|
ret = submit_compressed_bio(fs_info, cb, bio, 0);
|
||||||
if (ret) {
|
if (ret)
|
||||||
bio->bi_status = ret;
|
goto finish_cb;
|
||||||
bio_endio(bio);
|
bio = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio = btrfs_bio_alloc(first_byte);
|
|
||||||
bio->bi_opf = bio_op | write_flags;
|
|
||||||
bio->bi_private = cb;
|
|
||||||
bio->bi_end_io = end_compressed_bio_write;
|
|
||||||
if (blkcg_css)
|
|
||||||
bio->bi_opf |= REQ_CGROUP_PUNT;
|
|
||||||
/*
|
|
||||||
* Use bio_add_page() to ensure the bio has at least one
|
|
||||||
* page.
|
|
||||||
*/
|
|
||||||
bio_add_page(bio, page, PAGE_SIZE, 0);
|
|
||||||
}
|
|
||||||
if (bytes_left < PAGE_SIZE) {
|
|
||||||
btrfs_info(fs_info,
|
|
||||||
"bytes left %lu compress len %u nr %u",
|
|
||||||
bytes_left, cb->compressed_len, cb->nr_pages);
|
|
||||||
}
|
|
||||||
bytes_left -= PAGE_SIZE;
|
|
||||||
first_byte += PAGE_SIZE;
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
|
|
||||||
if (!skip_sum) {
|
|
||||||
ret = btrfs_csum_one_bio(inode, bio, start, 1);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, bio, 0);
|
|
||||||
if (ret) {
|
|
||||||
bio->bi_status = ret;
|
|
||||||
bio_endio(bio);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (blkcg_css)
|
if (blkcg_css)
|
||||||
kthread_associate_blkcg(NULL);
|
kthread_associate_blkcg(NULL);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
finish_cb:
|
||||||
|
if (bio) {
|
||||||
|
bio->bi_status = ret;
|
||||||
|
bio_endio(bio);
|
||||||
|
}
|
||||||
|
/* Last byte of @cb is submitted, endio will free @cb */
|
||||||
|
if (cur_disk_bytenr == disk_start + compressed_len)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||||
|
(disk_start + compressed_len - cur_disk_bytenr) >>
|
||||||
|
fs_info->sectorsize_bits);
|
||||||
|
/*
|
||||||
|
* Even with previous bio ended, we should still have io not yet
|
||||||
|
* submitted, thus need to finish manually.
|
||||||
|
*/
|
||||||
|
ASSERT(refcount_read(&cb->pending_sectors));
|
||||||
|
/* Now we are the only one referring @cb, can finish it safely. */
|
||||||
|
finish_compressed_bio_write(cb);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 bio_end_offset(struct bio *bio)
|
static u64 bio_end_offset(struct bio *bio)
|
||||||
@@ -541,25 +636,33 @@ static u64 bio_end_offset(struct bio *bio)
|
|||||||
return page_offset(last->bv_page) + last->bv_len + last->bv_offset;
|
return page_offset(last->bv_page) + last->bv_len + last->bv_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Add extra pages in the same compressed file extent so that we don't need to
|
||||||
|
* re-read the same extent again and again.
|
||||||
|
*
|
||||||
|
* NOTE: this won't work well for subpage, as for subpage read, we lock the
|
||||||
|
* full page then submit bio for each compressed/regular extents.
|
||||||
|
*
|
||||||
|
* This means, if we have several sectors in the same page points to the same
|
||||||
|
* on-disk compressed data, we will re-read the same extent many times and
|
||||||
|
* this function can only help for the next page.
|
||||||
|
*/
|
||||||
static noinline int add_ra_bio_pages(struct inode *inode,
|
static noinline int add_ra_bio_pages(struct inode *inode,
|
||||||
u64 compressed_end,
|
u64 compressed_end,
|
||||||
struct compressed_bio *cb)
|
struct compressed_bio *cb)
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
unsigned long end_index;
|
unsigned long end_index;
|
||||||
unsigned long pg_index;
|
u64 cur = bio_end_offset(cb->orig_bio);
|
||||||
u64 last_offset;
|
|
||||||
u64 isize = i_size_read(inode);
|
u64 isize = i_size_read(inode);
|
||||||
int ret;
|
int ret;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long nr_pages = 0;
|
|
||||||
struct extent_map *em;
|
struct extent_map *em;
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
struct extent_map_tree *em_tree;
|
struct extent_map_tree *em_tree;
|
||||||
struct extent_io_tree *tree;
|
struct extent_io_tree *tree;
|
||||||
u64 end;
|
int sectors_missed = 0;
|
||||||
int misses = 0;
|
|
||||||
|
|
||||||
last_offset = bio_end_offset(cb->orig_bio);
|
|
||||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||||
tree = &BTRFS_I(inode)->io_tree;
|
tree = &BTRFS_I(inode)->io_tree;
|
||||||
|
|
||||||
@@ -578,18 +681,29 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
|||||||
|
|
||||||
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
|
end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
|
||||||
|
|
||||||
while (last_offset < compressed_end) {
|
while (cur < compressed_end) {
|
||||||
pg_index = last_offset >> PAGE_SHIFT;
|
u64 page_end;
|
||||||
|
u64 pg_index = cur >> PAGE_SHIFT;
|
||||||
|
u32 add_size;
|
||||||
|
|
||||||
if (pg_index > end_index)
|
if (pg_index > end_index)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
page = xa_load(&mapping->i_pages, pg_index);
|
page = xa_load(&mapping->i_pages, pg_index);
|
||||||
if (page && !xa_is_value(page)) {
|
if (page && !xa_is_value(page)) {
|
||||||
misses++;
|
sectors_missed += (PAGE_SIZE - offset_in_page(cur)) >>
|
||||||
if (misses > 4)
|
fs_info->sectorsize_bits;
|
||||||
|
|
||||||
|
/* Beyond threshold, no need to continue */
|
||||||
|
if (sectors_missed > 4)
|
||||||
break;
|
break;
|
||||||
goto next;
|
|
||||||
|
/*
|
||||||
|
* Jump to next page start as we already have page for
|
||||||
|
* current offset.
|
||||||
|
*/
|
||||||
|
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
page = __page_cache_alloc(mapping_gfp_constraint(mapping,
|
page = __page_cache_alloc(mapping_gfp_constraint(mapping,
|
||||||
@@ -599,14 +713,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
|||||||
|
|
||||||
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
|
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
|
||||||
put_page(page);
|
put_page(page);
|
||||||
goto next;
|
/* There is already a page, skip to page end */
|
||||||
|
cur = (pg_index << PAGE_SHIFT) + PAGE_SIZE;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* at this point, we have a locked page in the page cache
|
|
||||||
* for these bytes in the file. But, we have to make
|
|
||||||
* sure they map to this compressed extent on disk.
|
|
||||||
*/
|
|
||||||
ret = set_page_extent_mapped(page);
|
ret = set_page_extent_mapped(page);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
@@ -614,18 +725,22 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
end = last_offset + PAGE_SIZE - 1;
|
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
|
||||||
lock_extent(tree, last_offset, end);
|
lock_extent(tree, cur, page_end);
|
||||||
read_lock(&em_tree->lock);
|
read_lock(&em_tree->lock);
|
||||||
em = lookup_extent_mapping(em_tree, last_offset,
|
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
|
||||||
PAGE_SIZE);
|
|
||||||
read_unlock(&em_tree->lock);
|
read_unlock(&em_tree->lock);
|
||||||
|
|
||||||
if (!em || last_offset < em->start ||
|
/*
|
||||||
(last_offset + PAGE_SIZE > extent_map_end(em)) ||
|
* At this point, we have a locked page in the page cache for
|
||||||
|
* these bytes in the file. But, we have to make sure they map
|
||||||
|
* to this compressed extent on disk.
|
||||||
|
*/
|
||||||
|
if (!em || cur < em->start ||
|
||||||
|
(cur + fs_info->sectorsize > extent_map_end(em)) ||
|
||||||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
|
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
unlock_extent(tree, last_offset, end);
|
unlock_extent(tree, cur, page_end);
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
break;
|
break;
|
||||||
@@ -643,20 +758,23 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = bio_add_page(cb->orig_bio, page,
|
add_size = min(em->start + em->len, page_end + 1) - cur;
|
||||||
PAGE_SIZE, 0);
|
ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
|
||||||
|
if (ret != add_size) {
|
||||||
if (ret == PAGE_SIZE) {
|
unlock_extent(tree, cur, page_end);
|
||||||
nr_pages++;
|
|
||||||
put_page(page);
|
|
||||||
} else {
|
|
||||||
unlock_extent(tree, last_offset, end);
|
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
put_page(page);
|
put_page(page);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
next:
|
/*
|
||||||
last_offset += PAGE_SIZE;
|
* If it's subpage, we also need to increase its
|
||||||
|
* subpage::readers number, as at endio we will decrease
|
||||||
|
* subpage::readers and to unlock the page.
|
||||||
|
*/
|
||||||
|
if (fs_info->sectorsize < PAGE_SIZE)
|
||||||
|
btrfs_subpage_start_reader(fs_info, page, cur, add_size);
|
||||||
|
put_page(page);
|
||||||
|
cur += add_size;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -681,9 +799,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||||||
unsigned int compressed_len;
|
unsigned int compressed_len;
|
||||||
unsigned int nr_pages;
|
unsigned int nr_pages;
|
||||||
unsigned int pg_index;
|
unsigned int pg_index;
|
||||||
struct page *page;
|
struct bio *comp_bio = NULL;
|
||||||
struct bio *comp_bio;
|
const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||||
u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
|
u64 cur_disk_byte = disk_bytenr;
|
||||||
|
u64 next_stripe_start;
|
||||||
u64 file_offset;
|
u64 file_offset;
|
||||||
u64 em_len;
|
u64 em_len;
|
||||||
u64 em_start;
|
u64 em_start;
|
||||||
@@ -710,7 +829,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||||||
if (!cb)
|
if (!cb)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
refcount_set(&cb->pending_bios, 0);
|
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
|
||||||
cb->errors = 0;
|
cb->errors = 0;
|
||||||
cb->inode = inode;
|
cb->inode = inode;
|
||||||
cb->mirror_num = mirror_num;
|
cb->mirror_num = mirror_num;
|
||||||
@@ -750,86 +869,74 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||||||
/* include any pages we added in add_ra-bio_pages */
|
/* include any pages we added in add_ra-bio_pages */
|
||||||
cb->len = bio->bi_iter.bi_size;
|
cb->len = bio->bi_iter.bi_size;
|
||||||
|
|
||||||
comp_bio = btrfs_bio_alloc(cur_disk_byte);
|
while (cur_disk_byte < disk_bytenr + compressed_len) {
|
||||||
comp_bio->bi_opf = REQ_OP_READ;
|
u64 offset = cur_disk_byte - disk_bytenr;
|
||||||
comp_bio->bi_private = cb;
|
unsigned int index = offset >> PAGE_SHIFT;
|
||||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
unsigned int real_size;
|
||||||
refcount_set(&cb->pending_bios, 1);
|
unsigned int added;
|
||||||
|
struct page *page = cb->compressed_pages[index];
|
||||||
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
|
bool submit = false;
|
||||||
u32 pg_len = PAGE_SIZE;
|
|
||||||
int submit = 0;
|
|
||||||
|
|
||||||
|
/* Allocate new bio if submitted or not yet allocated */
|
||||||
|
if (!comp_bio) {
|
||||||
|
comp_bio = alloc_compressed_bio(cb, cur_disk_byte,
|
||||||
|
REQ_OP_READ, end_compressed_bio_read,
|
||||||
|
&next_stripe_start);
|
||||||
|
if (IS_ERR(comp_bio)) {
|
||||||
|
ret = errno_to_blk_status(PTR_ERR(comp_bio));
|
||||||
|
comp_bio = NULL;
|
||||||
|
goto finish_cb;
|
||||||
|
}
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* To handle subpage case, we need to make sure the bio only
|
* We should never reach next_stripe_start start as we will
|
||||||
* covers the range we need.
|
* submit comp_bio when reach the boundary immediately.
|
||||||
*
|
|
||||||
* If we're at the last page, truncate the length to only cover
|
|
||||||
* the remaining part.
|
|
||||||
*/
|
*/
|
||||||
if (pg_index == nr_pages - 1)
|
ASSERT(cur_disk_byte != next_stripe_start);
|
||||||
pg_len = min_t(u32, PAGE_SIZE,
|
/*
|
||||||
compressed_len - pg_index * PAGE_SIZE);
|
* We have various limit on the real read size:
|
||||||
|
* - stripe boundary
|
||||||
|
* - page boundary
|
||||||
|
* - compressed length boundary
|
||||||
|
*/
|
||||||
|
real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte);
|
||||||
|
real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
|
||||||
|
real_size = min_t(u64, real_size, compressed_len - offset);
|
||||||
|
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
|
||||||
|
|
||||||
page = cb->compressed_pages[pg_index];
|
added = bio_add_page(comp_bio, page, real_size, offset_in_page(offset));
|
||||||
page->mapping = inode->i_mapping;
|
/*
|
||||||
page->index = em_start >> PAGE_SHIFT;
|
* Maximum compressed extent is smaller than bio size limit,
|
||||||
|
* thus bio_add_page() should always success.
|
||||||
|
*/
|
||||||
|
ASSERT(added == real_size);
|
||||||
|
cur_disk_byte += added;
|
||||||
|
|
||||||
if (comp_bio->bi_iter.bi_size)
|
/* Reached stripe boundary, need to submit */
|
||||||
submit = btrfs_bio_fits_in_stripe(page, pg_len,
|
if (cur_disk_byte == next_stripe_start)
|
||||||
comp_bio, 0);
|
submit = true;
|
||||||
|
|
||||||
page->mapping = NULL;
|
/* Has finished the range, need to submit */
|
||||||
if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
|
if (cur_disk_byte == disk_bytenr + compressed_len)
|
||||||
|
submit = true;
|
||||||
|
|
||||||
|
if (submit) {
|
||||||
unsigned int nr_sectors;
|
unsigned int nr_sectors;
|
||||||
|
|
||||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
|
|
||||||
BTRFS_WQ_ENDIO_DATA);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* inc the count before we submit the bio so
|
|
||||||
* we know the end IO handler won't happen before
|
|
||||||
* we inc the count. Otherwise, the cb might get
|
|
||||||
* freed before we're done setting it up
|
|
||||||
*/
|
|
||||||
refcount_inc(&cb->pending_bios);
|
|
||||||
|
|
||||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
if (ret)
|
||||||
|
goto finish_cb;
|
||||||
|
|
||||||
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
||||||
fs_info->sectorsize);
|
fs_info->sectorsize);
|
||||||
sums += fs_info->csum_size * nr_sectors;
|
sums += fs_info->csum_size * nr_sectors;
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
ret = submit_compressed_bio(fs_info, cb, comp_bio, mirror_num);
|
||||||
if (ret) {
|
if (ret)
|
||||||
comp_bio->bi_status = ret;
|
goto finish_cb;
|
||||||
bio_endio(comp_bio);
|
comp_bio = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
comp_bio = btrfs_bio_alloc(cur_disk_byte);
|
|
||||||
comp_bio->bi_opf = REQ_OP_READ;
|
|
||||||
comp_bio->bi_private = cb;
|
|
||||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
|
||||||
|
|
||||||
bio_add_page(comp_bio, page, pg_len, 0);
|
|
||||||
}
|
}
|
||||||
cur_disk_byte += pg_len;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
|
|
||||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
|
|
||||||
if (ret) {
|
|
||||||
comp_bio->bi_status = ret;
|
|
||||||
bio_endio(comp_bio);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
fail2:
|
fail2:
|
||||||
@@ -844,6 +951,26 @@ fail1:
|
|||||||
out:
|
out:
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
return ret;
|
return ret;
|
||||||
|
finish_cb:
|
||||||
|
if (comp_bio) {
|
||||||
|
comp_bio->bi_status = ret;
|
||||||
|
bio_endio(comp_bio);
|
||||||
|
}
|
||||||
|
/* All bytes of @cb is submitted, endio will free @cb */
|
||||||
|
if (cur_disk_byte == disk_bytenr + compressed_len)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
|
||||||
|
(disk_bytenr + compressed_len - cur_disk_byte) >>
|
||||||
|
fs_info->sectorsize_bits);
|
||||||
|
/*
|
||||||
|
* Even with previous bio ended, we should still have io not yet
|
||||||
|
* submitted, thus need to finish @cb manually.
|
||||||
|
*/
|
||||||
|
ASSERT(refcount_read(&cb->pending_sectors));
|
||||||
|
/* Now we are the only one referring @cb, can finish it safely. */
|
||||||
|
finish_compressed_bio_read(cb, NULL);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ struct btrfs_inode;
|
|||||||
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
||||||
|
|
||||||
struct compressed_bio {
|
struct compressed_bio {
|
||||||
/* number of bios pending for this compressed extent */
|
/* Number of sectors with unfinished IO (unsubmitted or unfinished) */
|
||||||
refcount_t pending_bios;
|
refcount_t pending_sectors;
|
||||||
|
|
||||||
/* Number of compressed pages in the array */
|
/* Number of compressed pages in the array */
|
||||||
unsigned int nr_pages;
|
unsigned int nr_pages;
|
||||||
|
|||||||
156
fs/btrfs/ctree.c
156
fs/btrfs/ctree.c
@@ -396,7 +396,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
|
|||||||
if (*cow_ret == buf)
|
if (*cow_ret == buf)
|
||||||
unlock_orig = 1;
|
unlock_orig = 1;
|
||||||
|
|
||||||
btrfs_assert_tree_locked(buf);
|
btrfs_assert_tree_write_locked(buf);
|
||||||
|
|
||||||
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||||
trans->transid != fs_info->running_transaction->transid);
|
trans->transid != fs_info->running_transaction->transid);
|
||||||
@@ -2488,7 +2488,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
BUG_ON(!path->nodes[level]);
|
BUG_ON(!path->nodes[level]);
|
||||||
btrfs_assert_tree_locked(path->nodes[level]);
|
btrfs_assert_tree_write_locked(path->nodes[level]);
|
||||||
lower = path->nodes[level];
|
lower = path->nodes[level];
|
||||||
nritems = btrfs_header_nritems(lower);
|
nritems = btrfs_header_nritems(lower);
|
||||||
BUG_ON(slot > nritems);
|
BUG_ON(slot > nritems);
|
||||||
@@ -2828,7 +2828,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
|
|||||||
if (slot >= btrfs_header_nritems(upper) - 1)
|
if (slot >= btrfs_header_nritems(upper) - 1)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
btrfs_assert_tree_locked(path->nodes[1]);
|
btrfs_assert_tree_write_locked(path->nodes[1]);
|
||||||
|
|
||||||
right = btrfs_read_node_slot(upper, slot + 1);
|
right = btrfs_read_node_slot(upper, slot + 1);
|
||||||
/*
|
/*
|
||||||
@@ -3066,7 +3066,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
|
|||||||
if (right_nritems == 0)
|
if (right_nritems == 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
btrfs_assert_tree_locked(path->nodes[1]);
|
btrfs_assert_tree_write_locked(path->nodes[1]);
|
||||||
|
|
||||||
left = btrfs_read_node_slot(path->nodes[1], slot - 1);
|
left = btrfs_read_node_slot(path->nodes[1], slot - 1);
|
||||||
/*
|
/*
|
||||||
@@ -3581,40 +3581,6 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This function duplicate a item, giving 'new_key' to the new item.
|
|
||||||
* It guarantees both items live in the same tree leaf and the new item
|
|
||||||
* is contiguous with the original item.
|
|
||||||
*
|
|
||||||
* This allows us to split file extent in place, keeping a lock on the
|
|
||||||
* leaf the entire time.
|
|
||||||
*/
|
|
||||||
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
|
|
||||||
struct btrfs_root *root,
|
|
||||||
struct btrfs_path *path,
|
|
||||||
const struct btrfs_key *new_key)
|
|
||||||
{
|
|
||||||
struct extent_buffer *leaf;
|
|
||||||
int ret;
|
|
||||||
u32 item_size;
|
|
||||||
|
|
||||||
leaf = path->nodes[0];
|
|
||||||
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
|
||||||
ret = setup_leaf_for_split(trans, root, path,
|
|
||||||
item_size + sizeof(struct btrfs_item));
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
path->slots[0]++;
|
|
||||||
setup_items_for_insert(root, path, new_key, &item_size, 1);
|
|
||||||
leaf = path->nodes[0];
|
|
||||||
memcpy_extent_buffer(leaf,
|
|
||||||
btrfs_item_ptr_offset(leaf, path->slots[0]),
|
|
||||||
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
|
|
||||||
item_size);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* make the item pointed to by the path smaller. new_size indicates
|
* make the item pointed to by the path smaller. new_size indicates
|
||||||
* how small to make it, and from_end tells us if we just chop bytes
|
* how small to make it, and from_end tells us if we just chop bytes
|
||||||
@@ -3786,13 +3752,10 @@ void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
|
|||||||
*
|
*
|
||||||
* @root: root we are inserting items to
|
* @root: root we are inserting items to
|
||||||
* @path: points to the leaf/slot where we are going to insert new items
|
* @path: points to the leaf/slot where we are going to insert new items
|
||||||
* @cpu_key: array of keys for items to be inserted
|
* @batch: information about the batch of items to insert
|
||||||
* @data_size: size of the body of each item we are going to insert
|
|
||||||
* @nr: size of @cpu_key/@data_size arrays
|
|
||||||
*/
|
*/
|
||||||
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
||||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
const struct btrfs_item_batch *batch)
|
||||||
int nr)
|
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
struct btrfs_item *item;
|
struct btrfs_item *item;
|
||||||
@@ -3804,14 +3767,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
|||||||
int slot;
|
int slot;
|
||||||
struct btrfs_map_token token;
|
struct btrfs_map_token token;
|
||||||
u32 total_size;
|
u32 total_size;
|
||||||
u32 total_data = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < nr; i++)
|
|
||||||
total_data += data_size[i];
|
|
||||||
total_size = total_data + (nr * sizeof(struct btrfs_item));
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Before anything else, update keys in the parent and other ancestors
|
||||||
|
* if needed, then release the write locks on them, so that other tasks
|
||||||
|
* can use them while we modify the leaf.
|
||||||
|
*/
|
||||||
if (path->slots[0] == 0) {
|
if (path->slots[0] == 0) {
|
||||||
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
|
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
|
||||||
fixup_low_keys(path, &disk_key, 1);
|
fixup_low_keys(path, &disk_key, 1);
|
||||||
}
|
}
|
||||||
btrfs_unlock_up_safe(path, 1);
|
btrfs_unlock_up_safe(path, 1);
|
||||||
@@ -3821,6 +3784,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
|||||||
|
|
||||||
nritems = btrfs_header_nritems(leaf);
|
nritems = btrfs_header_nritems(leaf);
|
||||||
data_end = leaf_data_end(leaf);
|
data_end = leaf_data_end(leaf);
|
||||||
|
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
|
||||||
|
|
||||||
if (btrfs_leaf_free_space(leaf) < total_size) {
|
if (btrfs_leaf_free_space(leaf) < total_size) {
|
||||||
btrfs_print_leaf(leaf);
|
btrfs_print_leaf(leaf);
|
||||||
@@ -3850,31 +3814,32 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
|||||||
item = btrfs_item_nr(i);
|
item = btrfs_item_nr(i);
|
||||||
ioff = btrfs_token_item_offset(&token, item);
|
ioff = btrfs_token_item_offset(&token, item);
|
||||||
btrfs_set_token_item_offset(&token, item,
|
btrfs_set_token_item_offset(&token, item,
|
||||||
ioff - total_data);
|
ioff - batch->total_data_size);
|
||||||
}
|
}
|
||||||
/* shift the items */
|
/* shift the items */
|
||||||
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
|
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
|
||||||
btrfs_item_nr_offset(slot),
|
btrfs_item_nr_offset(slot),
|
||||||
(nritems - slot) * sizeof(struct btrfs_item));
|
(nritems - slot) * sizeof(struct btrfs_item));
|
||||||
|
|
||||||
/* shift the data */
|
/* shift the data */
|
||||||
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
|
||||||
data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
|
data_end - batch->total_data_size,
|
||||||
data_end, old_data - data_end);
|
BTRFS_LEAF_DATA_OFFSET + data_end,
|
||||||
|
old_data - data_end);
|
||||||
data_end = old_data;
|
data_end = old_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* setup the item for the new data */
|
/* setup the item for the new data */
|
||||||
for (i = 0; i < nr; i++) {
|
for (i = 0; i < batch->nr; i++) {
|
||||||
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
|
btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
|
||||||
btrfs_set_item_key(leaf, &disk_key, slot + i);
|
btrfs_set_item_key(leaf, &disk_key, slot + i);
|
||||||
item = btrfs_item_nr(slot + i);
|
item = btrfs_item_nr(slot + i);
|
||||||
data_end -= data_size[i];
|
data_end -= batch->data_sizes[i];
|
||||||
btrfs_set_token_item_offset(&token, item, data_end);
|
btrfs_set_token_item_offset(&token, item, data_end);
|
||||||
btrfs_set_token_item_size(&token, item, data_size[i]);
|
btrfs_set_token_item_size(&token, item, batch->data_sizes[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_set_header_nritems(leaf, nritems + nr);
|
btrfs_set_header_nritems(leaf, nritems + batch->nr);
|
||||||
btrfs_mark_buffer_dirty(leaf);
|
btrfs_mark_buffer_dirty(leaf);
|
||||||
|
|
||||||
if (btrfs_leaf_free_space(leaf) < 0) {
|
if (btrfs_leaf_free_space(leaf) < 0) {
|
||||||
@@ -3883,6 +3848,29 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert a new item into a leaf.
|
||||||
|
*
|
||||||
|
* @root: The root of the btree.
|
||||||
|
* @path: A path pointing to the target leaf and slot.
|
||||||
|
* @key: The key of the new item.
|
||||||
|
* @data_size: The size of the data associated with the new key.
|
||||||
|
*/
|
||||||
|
void btrfs_setup_item_for_insert(struct btrfs_root *root,
|
||||||
|
struct btrfs_path *path,
|
||||||
|
const struct btrfs_key *key,
|
||||||
|
u32 data_size)
|
||||||
|
{
|
||||||
|
struct btrfs_item_batch batch;
|
||||||
|
|
||||||
|
batch.keys = key;
|
||||||
|
batch.data_sizes = &data_size;
|
||||||
|
batch.total_data_size = data_size;
|
||||||
|
batch.nr = 1;
|
||||||
|
|
||||||
|
setup_items_for_insert(root, path, &batch);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Given a key and some data, insert items into the tree.
|
* Given a key and some data, insert items into the tree.
|
||||||
* This does all the path init required, making room in the tree if needed.
|
* This does all the path init required, making room in the tree if needed.
|
||||||
@@ -3890,20 +3878,14 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
|||||||
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
const struct btrfs_item_batch *batch)
|
||||||
int nr)
|
|
||||||
{
|
{
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int slot;
|
int slot;
|
||||||
int i;
|
u32 total_size;
|
||||||
u32 total_size = 0;
|
|
||||||
u32 total_data = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < nr; i++)
|
total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
|
||||||
total_data += data_size[i];
|
ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
|
||||||
|
|
||||||
total_size = total_data + (nr * sizeof(struct btrfs_item));
|
|
||||||
ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
|
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
return -EEXIST;
|
return -EEXIST;
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
@@ -3912,7 +3894,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
|||||||
slot = path->slots[0];
|
slot = path->slots[0];
|
||||||
BUG_ON(slot < 0);
|
BUG_ON(slot < 0);
|
||||||
|
|
||||||
setup_items_for_insert(root, path, cpu_key, data_size, nr);
|
setup_items_for_insert(root, path, batch);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3943,6 +3925,40 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function duplicates an item, giving 'new_key' to the new item.
|
||||||
|
* It guarantees both items live in the same tree leaf and the new item is
|
||||||
|
* contiguous with the original item.
|
||||||
|
*
|
||||||
|
* This allows us to split a file extent in place, keeping a lock on the leaf
|
||||||
|
* the entire time.
|
||||||
|
*/
|
||||||
|
int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
|
||||||
|
struct btrfs_root *root,
|
||||||
|
struct btrfs_path *path,
|
||||||
|
const struct btrfs_key *new_key)
|
||||||
|
{
|
||||||
|
struct extent_buffer *leaf;
|
||||||
|
int ret;
|
||||||
|
u32 item_size;
|
||||||
|
|
||||||
|
leaf = path->nodes[0];
|
||||||
|
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
|
||||||
|
ret = setup_leaf_for_split(trans, root, path,
|
||||||
|
item_size + sizeof(struct btrfs_item));
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
path->slots[0]++;
|
||||||
|
btrfs_setup_item_for_insert(root, path, new_key, item_size);
|
||||||
|
leaf = path->nodes[0];
|
||||||
|
memcpy_extent_buffer(leaf,
|
||||||
|
btrfs_item_ptr_offset(leaf, path->slots[0]),
|
||||||
|
btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
|
||||||
|
item_size);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* delete the pointer from a given node.
|
* delete the pointer from a given node.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ extern struct kmem_cache *btrfs_free_space_cachep;
|
|||||||
extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||||
struct btrfs_ordered_sum;
|
struct btrfs_ordered_sum;
|
||||||
struct btrfs_ref;
|
struct btrfs_ref;
|
||||||
|
struct btrfs_bio;
|
||||||
|
|
||||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||||
|
|
||||||
@@ -217,6 +218,9 @@ struct btrfs_root_backup {
|
|||||||
u8 unused_8[10];
|
u8 unused_8[10];
|
||||||
} __attribute__ ((__packed__));
|
} __attribute__ ((__packed__));
|
||||||
|
|
||||||
|
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
||||||
|
#define BTRFS_SUPER_INFO_SIZE 4096
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the super block basically lists the main trees of the FS
|
* the super block basically lists the main trees of the FS
|
||||||
* it currently lacks any block count etc etc
|
* it currently lacks any block count etc etc
|
||||||
@@ -269,7 +273,11 @@ struct btrfs_super_block {
|
|||||||
__le64 reserved[28];
|
__le64 reserved[28];
|
||||||
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
|
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
|
||||||
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
|
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
|
||||||
|
|
||||||
|
/* Padded to 4096 bytes */
|
||||||
|
u8 padding[565];
|
||||||
} __attribute__ ((__packed__));
|
} __attribute__ ((__packed__));
|
||||||
|
static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Compat flags that we support. If any incompat flags are set other than the
|
* Compat flags that we support. If any incompat flags are set other than the
|
||||||
@@ -899,6 +907,7 @@ struct btrfs_fs_info {
|
|||||||
struct btrfs_workqueue *scrub_workers;
|
struct btrfs_workqueue *scrub_workers;
|
||||||
struct btrfs_workqueue *scrub_wr_completion_workers;
|
struct btrfs_workqueue *scrub_wr_completion_workers;
|
||||||
struct btrfs_workqueue *scrub_parity_workers;
|
struct btrfs_workqueue *scrub_parity_workers;
|
||||||
|
struct btrfs_subpage_info *subpage_info;
|
||||||
|
|
||||||
struct btrfs_discard_ctl discard_ctl;
|
struct btrfs_discard_ctl discard_ctl;
|
||||||
|
|
||||||
@@ -1017,6 +1026,16 @@ struct btrfs_fs_info {
|
|||||||
spinlock_t treelog_bg_lock;
|
spinlock_t treelog_bg_lock;
|
||||||
u64 treelog_bg;
|
u64 treelog_bg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start of the dedicated data relocation block group, protected by
|
||||||
|
* relocation_bg_lock.
|
||||||
|
*/
|
||||||
|
spinlock_t relocation_bg_lock;
|
||||||
|
u64 data_reloc_bg;
|
||||||
|
|
||||||
|
spinlock_t zone_active_bgs_lock;
|
||||||
|
struct list_head zone_active_bgs;
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
spinlock_t ref_verify_lock;
|
spinlock_t ref_verify_lock;
|
||||||
struct rb_root block_tree;
|
struct rb_root block_tree;
|
||||||
@@ -2885,16 +2904,42 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
|
|||||||
return btrfs_del_items(trans, root, path, path->slots[0], 1);
|
return btrfs_del_items(trans, root, path, path->slots[0], 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
|
/*
|
||||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
* Describes a batch of items to insert in a btree. This is used by
|
||||||
int nr);
|
* btrfs_insert_empty_items().
|
||||||
|
*/
|
||||||
|
struct btrfs_item_batch {
|
||||||
|
/*
|
||||||
|
* Pointer to an array containing the keys of the items to insert (in
|
||||||
|
* sorted order).
|
||||||
|
*/
|
||||||
|
const struct btrfs_key *keys;
|
||||||
|
/* Pointer to an array containing the data size for each item to insert. */
|
||||||
|
const u32 *data_sizes;
|
||||||
|
/*
|
||||||
|
* The sum of data sizes for all items. The caller can compute this while
|
||||||
|
* setting up the data_sizes array, so it ends up being more efficient
|
||||||
|
* than having btrfs_insert_empty_items() or setup_item_for_insert()
|
||||||
|
* doing it, as it would avoid an extra loop over a potentially large
|
||||||
|
* array, and in the case of setup_item_for_insert(), we would be doing
|
||||||
|
* it while holding a write lock on a leaf and often on upper level nodes
|
||||||
|
* too, unnecessarily increasing the size of a critical section.
|
||||||
|
*/
|
||||||
|
u32 total_data_size;
|
||||||
|
/* Size of the keys and data_sizes arrays (number of items in the batch). */
|
||||||
|
int nr;
|
||||||
|
};
|
||||||
|
|
||||||
|
void btrfs_setup_item_for_insert(struct btrfs_root *root,
|
||||||
|
struct btrfs_path *path,
|
||||||
|
const struct btrfs_key *key,
|
||||||
|
u32 data_size);
|
||||||
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
const struct btrfs_key *key, void *data, u32 data_size);
|
const struct btrfs_key *key, void *data, u32 data_size);
|
||||||
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
const struct btrfs_key *cpu_key, u32 *data_size,
|
const struct btrfs_item_batch *batch);
|
||||||
int nr);
|
|
||||||
|
|
||||||
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
|
static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
@@ -2902,7 +2947,14 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
|
|||||||
const struct btrfs_key *key,
|
const struct btrfs_key *key,
|
||||||
u32 data_size)
|
u32 data_size)
|
||||||
{
|
{
|
||||||
return btrfs_insert_empty_items(trans, root, path, key, &data_size, 1);
|
struct btrfs_item_batch batch;
|
||||||
|
|
||||||
|
batch.keys = key;
|
||||||
|
batch.data_sizes = &data_size;
|
||||||
|
batch.total_data_size = data_size;
|
||||||
|
batch.nr = 1;
|
||||||
|
|
||||||
|
return btrfs_insert_empty_items(trans, root, path, &batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
|
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
|
||||||
@@ -3129,8 +3181,9 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
|||||||
/* inode.c */
|
/* inode.c */
|
||||||
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
|
||||||
int mirror_num, unsigned long bio_flags);
|
int mirror_num, unsigned long bio_flags);
|
||||||
unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
|
unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
|
||||||
struct page *page, u64 start, u64 end);
|
u32 bio_offset, struct page *page,
|
||||||
|
u64 start, u64 end);
|
||||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||||
u64 start, u64 len);
|
u64 start, u64 len);
|
||||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||||
@@ -3142,7 +3195,6 @@ void __btrfs_del_delalloc_inode(struct btrfs_root *root,
|
|||||||
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
|
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
|
||||||
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
|
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
|
||||||
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
|
||||||
struct btrfs_inode *dir, struct btrfs_inode *inode,
|
struct btrfs_inode *dir, struct btrfs_inode *inode,
|
||||||
const char *name, int name_len);
|
const char *name, int name_len);
|
||||||
int btrfs_add_link(struct btrfs_trans_handle *trans,
|
int btrfs_add_link(struct btrfs_trans_handle *trans,
|
||||||
@@ -3174,8 +3226,6 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
|
|||||||
struct extent_state *other);
|
struct extent_state *other);
|
||||||
void btrfs_split_delalloc_extent(struct inode *inode,
|
void btrfs_split_delalloc_extent(struct inode *inode,
|
||||||
struct extent_state *orig, u64 split);
|
struct extent_state *orig, u64 split);
|
||||||
int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
|
|
||||||
unsigned long bio_flags);
|
|
||||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
||||||
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
|
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
|
||||||
int btrfs_readpage(struct file *file, struct page *page);
|
int btrfs_readpage(struct file *file, struct page *page);
|
||||||
@@ -3242,9 +3292,9 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
|
|||||||
int btrfs_ioctl_get_supported_features(void __user *arg);
|
int btrfs_ioctl_get_supported_features(void __user *arg);
|
||||||
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
||||||
int __pure btrfs_is_empty_uuid(u8 *uuid);
|
int __pure btrfs_is_empty_uuid(u8 *uuid);
|
||||||
int btrfs_defrag_file(struct inode *inode, struct file *file,
|
int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
|
||||||
struct btrfs_ioctl_defrag_range_args *range,
|
struct btrfs_ioctl_defrag_range_args *range,
|
||||||
u64 newer_than, unsigned long max_pages);
|
u64 newer_than, unsigned long max_to_defrag);
|
||||||
void btrfs_get_block_group_info(struct list_head *groups_list,
|
void btrfs_get_block_group_info(struct list_head *groups_list,
|
||||||
struct btrfs_ioctl_space_info *space);
|
struct btrfs_ioctl_space_info *space);
|
||||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||||
@@ -3563,6 +3613,9 @@ do { \
|
|||||||
(errno), fmt, ##args); \
|
(errno), fmt, ##args); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
|
||||||
|
&(fs_info)->fs_state)))
|
||||||
|
|
||||||
__printf(5, 6)
|
__printf(5, 6)
|
||||||
__cold
|
__cold
|
||||||
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
|
||||||
@@ -3842,6 +3895,11 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
|||||||
return fs_info->zoned != 0;
|
return fs_info->zoned != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
|
||||||
|
{
|
||||||
|
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use page status Private2 to indicate there is an ordered extent with
|
* We use page status Private2 to indicate there is an ordered extent with
|
||||||
* unfinished IO.
|
* unfinished IO.
|
||||||
|
|||||||
@@ -679,19 +679,18 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
|||||||
struct btrfs_path *path,
|
struct btrfs_path *path,
|
||||||
struct btrfs_delayed_item *first_item)
|
struct btrfs_delayed_item *first_item)
|
||||||
{
|
{
|
||||||
LIST_HEAD(batch);
|
LIST_HEAD(item_list);
|
||||||
struct btrfs_delayed_item *curr;
|
struct btrfs_delayed_item *curr;
|
||||||
struct btrfs_delayed_item *next;
|
struct btrfs_delayed_item *next;
|
||||||
const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
|
const int max_size = BTRFS_LEAF_DATA_SIZE(root->fs_info);
|
||||||
|
struct btrfs_item_batch batch;
|
||||||
int total_size;
|
int total_size;
|
||||||
int nitems;
|
|
||||||
char *ins_data = NULL;
|
char *ins_data = NULL;
|
||||||
struct btrfs_key *ins_keys;
|
|
||||||
u32 *ins_sizes;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
list_add_tail(&first_item->tree_list, &batch);
|
list_add_tail(&first_item->tree_list, &item_list);
|
||||||
nitems = 1;
|
batch.total_data_size = first_item->data_len;
|
||||||
|
batch.nr = 1;
|
||||||
total_size = first_item->data_len + sizeof(struct btrfs_item);
|
total_size = first_item->data_len + sizeof(struct btrfs_item);
|
||||||
curr = first_item;
|
curr = first_item;
|
||||||
|
|
||||||
@@ -706,39 +705,43 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
|||||||
if (total_size + next_size > max_size)
|
if (total_size + next_size > max_size)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
list_add_tail(&next->tree_list, &batch);
|
list_add_tail(&next->tree_list, &item_list);
|
||||||
nitems++;
|
batch.nr++;
|
||||||
total_size += next_size;
|
total_size += next_size;
|
||||||
|
batch.total_data_size += next->data_len;
|
||||||
curr = next;
|
curr = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nitems == 1) {
|
if (batch.nr == 1) {
|
||||||
ins_keys = &first_item->key;
|
batch.keys = &first_item->key;
|
||||||
ins_sizes = &first_item->data_len;
|
batch.data_sizes = &first_item->data_len;
|
||||||
} else {
|
} else {
|
||||||
|
struct btrfs_key *ins_keys;
|
||||||
|
u32 *ins_sizes;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
ins_data = kmalloc(nitems * sizeof(u32) +
|
ins_data = kmalloc(batch.nr * sizeof(u32) +
|
||||||
nitems * sizeof(struct btrfs_key), GFP_NOFS);
|
batch.nr * sizeof(struct btrfs_key), GFP_NOFS);
|
||||||
if (!ins_data) {
|
if (!ins_data) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ins_sizes = (u32 *)ins_data;
|
ins_sizes = (u32 *)ins_data;
|
||||||
ins_keys = (struct btrfs_key *)(ins_data + nitems * sizeof(u32));
|
ins_keys = (struct btrfs_key *)(ins_data + batch.nr * sizeof(u32));
|
||||||
list_for_each_entry(curr, &batch, tree_list) {
|
batch.keys = ins_keys;
|
||||||
|
batch.data_sizes = ins_sizes;
|
||||||
|
list_for_each_entry(curr, &item_list, tree_list) {
|
||||||
ins_keys[i] = curr->key;
|
ins_keys[i] = curr->key;
|
||||||
ins_sizes[i] = curr->data_len;
|
ins_sizes[i] = curr->data_len;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_insert_empty_items(trans, root, path, ins_keys, ins_sizes,
|
ret = btrfs_insert_empty_items(trans, root, path, &batch);
|
||||||
nitems);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
list_for_each_entry(curr, &batch, tree_list) {
|
list_for_each_entry(curr, &item_list, tree_list) {
|
||||||
char *data_ptr;
|
char *data_ptr;
|
||||||
|
|
||||||
data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
|
data_ptr = btrfs_item_ptr(path->nodes[0], path->slots[0], char);
|
||||||
@@ -754,7 +757,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
|||||||
*/
|
*/
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
|
||||||
list_for_each_entry_safe(curr, next, &batch, tree_list) {
|
list_for_each_entry_safe(curr, next, &item_list, tree_list) {
|
||||||
list_del(&curr->tree_list);
|
list_del(&curr->tree_list);
|
||||||
btrfs_delayed_item_release_metadata(root, curr);
|
btrfs_delayed_item_release_metadata(root, curr);
|
||||||
btrfs_release_delayed_item(curr);
|
btrfs_release_delayed_item(curr);
|
||||||
|
|||||||
@@ -906,7 +906,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
|||||||
u64 parent = generic_ref->parent;
|
u64 parent = generic_ref->parent;
|
||||||
u8 ref_type;
|
u8 ref_type;
|
||||||
|
|
||||||
is_system = (generic_ref->real_root == BTRFS_CHUNK_TREE_OBJECTID);
|
is_system = (generic_ref->tree_ref.owning_root == BTRFS_CHUNK_TREE_OBJECTID);
|
||||||
|
|
||||||
ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
|
ASSERT(generic_ref->type == BTRFS_REF_METADATA && generic_ref->action);
|
||||||
BUG_ON(extent_op && extent_op->is_data);
|
BUG_ON(extent_op && extent_op->is_data);
|
||||||
@@ -921,8 +921,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
||||||
is_fstree(generic_ref->real_root) &&
|
|
||||||
is_fstree(generic_ref->tree_ref.root) &&
|
|
||||||
!generic_ref->skip_qgroup) {
|
!generic_ref->skip_qgroup) {
|
||||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||||
if (!record) {
|
if (!record) {
|
||||||
@@ -938,14 +936,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
|
|||||||
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
|
ref_type = BTRFS_TREE_BLOCK_REF_KEY;
|
||||||
|
|
||||||
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
|
init_delayed_ref_common(fs_info, &ref->node, bytenr, num_bytes,
|
||||||
generic_ref->tree_ref.root, action, ref_type);
|
generic_ref->tree_ref.owning_root, action,
|
||||||
ref->root = generic_ref->tree_ref.root;
|
ref_type);
|
||||||
|
ref->root = generic_ref->tree_ref.owning_root;
|
||||||
ref->parent = parent;
|
ref->parent = parent;
|
||||||
ref->level = level;
|
ref->level = level;
|
||||||
|
|
||||||
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
|
init_delayed_ref_head(head_ref, record, bytenr, num_bytes,
|
||||||
generic_ref->tree_ref.root, 0, action, false,
|
generic_ref->tree_ref.owning_root, 0, action,
|
||||||
is_system);
|
false, is_system);
|
||||||
head_ref->extent_op = extent_op;
|
head_ref->extent_op = extent_op;
|
||||||
|
|
||||||
delayed_refs = &trans->transaction->delayed_refs;
|
delayed_refs = &trans->transaction->delayed_refs;
|
||||||
@@ -997,7 +996,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
|||||||
u64 bytenr = generic_ref->bytenr;
|
u64 bytenr = generic_ref->bytenr;
|
||||||
u64 num_bytes = generic_ref->len;
|
u64 num_bytes = generic_ref->len;
|
||||||
u64 parent = generic_ref->parent;
|
u64 parent = generic_ref->parent;
|
||||||
u64 ref_root = generic_ref->data_ref.ref_root;
|
u64 ref_root = generic_ref->data_ref.owning_root;
|
||||||
u64 owner = generic_ref->data_ref.ino;
|
u64 owner = generic_ref->data_ref.ino;
|
||||||
u64 offset = generic_ref->data_ref.offset;
|
u64 offset = generic_ref->data_ref.offset;
|
||||||
u8 ref_type;
|
u8 ref_type;
|
||||||
@@ -1026,8 +1025,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
|
||||||
is_fstree(ref_root) &&
|
|
||||||
is_fstree(generic_ref->real_root) &&
|
|
||||||
!generic_ref->skip_qgroup) {
|
!generic_ref->skip_qgroup) {
|
||||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||||
if (!record) {
|
if (!record) {
|
||||||
|
|||||||
@@ -186,8 +186,8 @@ enum btrfs_ref_type {
|
|||||||
struct btrfs_data_ref {
|
struct btrfs_data_ref {
|
||||||
/* For EXTENT_DATA_REF */
|
/* For EXTENT_DATA_REF */
|
||||||
|
|
||||||
/* Root which refers to this data extent */
|
/* Original root this data extent belongs to */
|
||||||
u64 ref_root;
|
u64 owning_root;
|
||||||
|
|
||||||
/* Inode which refers to this data extent */
|
/* Inode which refers to this data extent */
|
||||||
u64 ino;
|
u64 ino;
|
||||||
@@ -210,11 +210,11 @@ struct btrfs_tree_ref {
|
|||||||
int level;
|
int level;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Root which refers to this tree block.
|
* Root which owns this tree block.
|
||||||
*
|
*
|
||||||
* For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
|
* For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
|
||||||
*/
|
*/
|
||||||
u64 root;
|
u64 owning_root;
|
||||||
|
|
||||||
/* For non-skinny metadata, no special member needed */
|
/* For non-skinny metadata, no special member needed */
|
||||||
};
|
};
|
||||||
@@ -231,17 +231,10 @@ struct btrfs_ref {
|
|||||||
*/
|
*/
|
||||||
bool skip_qgroup;
|
bool skip_qgroup;
|
||||||
|
|
||||||
/*
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
* Optional. For which root is this modification.
|
/* Through which root is this modification. */
|
||||||
* Mostly used for qgroup optimization.
|
|
||||||
*
|
|
||||||
* When unset, data/tree ref init code will populate it.
|
|
||||||
* In certain cases, we're modifying reference for a different root.
|
|
||||||
* E.g. COW fs tree blocks for balance.
|
|
||||||
* In that case, tree_ref::root will be fs tree, but we're doing this
|
|
||||||
* for reloc tree, then we should set @real_root to reloc tree.
|
|
||||||
*/
|
|
||||||
u64 real_root;
|
u64 real_root;
|
||||||
|
#endif
|
||||||
u64 bytenr;
|
u64 bytenr;
|
||||||
u64 len;
|
u64 len;
|
||||||
|
|
||||||
@@ -271,26 +264,40 @@ static inline void btrfs_init_generic_ref(struct btrfs_ref *generic_ref,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
|
static inline void btrfs_init_tree_ref(struct btrfs_ref *generic_ref,
|
||||||
int level, u64 root)
|
int level, u64 root, u64 mod_root, bool skip_qgroup)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
/* If @real_root not set, use @root as fallback */
|
/* If @real_root not set, use @root as fallback */
|
||||||
if (!generic_ref->real_root)
|
generic_ref->real_root = mod_root ?: root;
|
||||||
generic_ref->real_root = root;
|
#endif
|
||||||
generic_ref->tree_ref.level = level;
|
generic_ref->tree_ref.level = level;
|
||||||
generic_ref->tree_ref.root = root;
|
generic_ref->tree_ref.owning_root = root;
|
||||||
generic_ref->type = BTRFS_REF_METADATA;
|
generic_ref->type = BTRFS_REF_METADATA;
|
||||||
|
if (skip_qgroup || !(is_fstree(root) &&
|
||||||
|
(!mod_root || is_fstree(mod_root))))
|
||||||
|
generic_ref->skip_qgroup = true;
|
||||||
|
else
|
||||||
|
generic_ref->skip_qgroup = false;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
|
static inline void btrfs_init_data_ref(struct btrfs_ref *generic_ref,
|
||||||
u64 ref_root, u64 ino, u64 offset)
|
u64 ref_root, u64 ino, u64 offset, u64 mod_root,
|
||||||
|
bool skip_qgroup)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||||
/* If @real_root not set, use @root as fallback */
|
/* If @real_root not set, use @root as fallback */
|
||||||
if (!generic_ref->real_root)
|
generic_ref->real_root = mod_root ?: ref_root;
|
||||||
generic_ref->real_root = ref_root;
|
#endif
|
||||||
generic_ref->data_ref.ref_root = ref_root;
|
generic_ref->data_ref.owning_root = ref_root;
|
||||||
generic_ref->data_ref.ino = ino;
|
generic_ref->data_ref.ino = ino;
|
||||||
generic_ref->data_ref.offset = offset;
|
generic_ref->data_ref.offset = offset;
|
||||||
generic_ref->type = BTRFS_REF_DATA;
|
generic_ref->type = BTRFS_REF_DATA;
|
||||||
|
if (skip_qgroup || !(is_fstree(ref_root) &&
|
||||||
|
(!mod_root || is_fstree(mod_root))))
|
||||||
|
generic_ref->skip_qgroup = true;
|
||||||
|
else
|
||||||
|
generic_ref->skip_qgroup = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline struct btrfs_delayed_extent_op *
|
static inline struct btrfs_delayed_extent_op *
|
||||||
|
|||||||
@@ -70,6 +70,7 @@ static int btrfs_dev_replace_kthread(void *data);
|
|||||||
|
|
||||||
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
|
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
|
||||||
{
|
{
|
||||||
|
struct btrfs_dev_lookup_args args = { .devid = BTRFS_DEV_REPLACE_DEVID };
|
||||||
struct btrfs_key key;
|
struct btrfs_key key;
|
||||||
struct btrfs_root *dev_root = fs_info->dev_root;
|
struct btrfs_root *dev_root = fs_info->dev_root;
|
||||||
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
|
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
|
||||||
@@ -100,8 +101,7 @@ no_valid_dev_replace_entry_found:
|
|||||||
* We don't have a replace item or it's corrupted. If there is
|
* We don't have a replace item or it's corrupted. If there is
|
||||||
* a replace target, fail the mount.
|
* a replace target, fail the mount.
|
||||||
*/
|
*/
|
||||||
if (btrfs_find_device(fs_info->fs_devices,
|
if (btrfs_find_device(fs_info->fs_devices, &args)) {
|
||||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
|
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"found replace target device without a valid replace item");
|
"found replace target device without a valid replace item");
|
||||||
ret = -EUCLEAN;
|
ret = -EUCLEAN;
|
||||||
@@ -163,8 +163,7 @@ no_valid_dev_replace_entry_found:
|
|||||||
* We don't have an active replace item but if there is a
|
* We don't have an active replace item but if there is a
|
||||||
* replace target, fail the mount.
|
* replace target, fail the mount.
|
||||||
*/
|
*/
|
||||||
if (btrfs_find_device(fs_info->fs_devices,
|
if (btrfs_find_device(fs_info->fs_devices, &args)) {
|
||||||
BTRFS_DEV_REPLACE_DEVID, NULL, NULL)) {
|
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"replace devid present without an active replace item");
|
"replace devid present without an active replace item");
|
||||||
ret = -EUCLEAN;
|
ret = -EUCLEAN;
|
||||||
@@ -175,11 +174,10 @@ no_valid_dev_replace_entry_found:
|
|||||||
break;
|
break;
|
||||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
|
||||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||||
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices,
|
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||||
src_devid, NULL, NULL);
|
args.devid = src_devid;
|
||||||
dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices,
|
dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||||
BTRFS_DEV_REPLACE_DEVID,
|
|
||||||
NULL, NULL);
|
|
||||||
/*
|
/*
|
||||||
* allow 'btrfs dev replace_cancel' if src/tgt device is
|
* allow 'btrfs dev replace_cancel' if src/tgt device is
|
||||||
* missing
|
* missing
|
||||||
|
|||||||
@@ -683,7 +683,7 @@ err:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
|
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
||||||
struct page *page, u64 start, u64 end,
|
struct page *page, u64 start, u64 end,
|
||||||
int mirror)
|
int mirror)
|
||||||
{
|
{
|
||||||
@@ -1036,7 +1036,7 @@ static int btree_set_page_dirty(struct page *page)
|
|||||||
BUG_ON(!eb);
|
BUG_ON(!eb);
|
||||||
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||||
BUG_ON(!atomic_read(&eb->refs));
|
BUG_ON(!atomic_read(&eb->refs));
|
||||||
btrfs_assert_tree_locked(eb);
|
btrfs_assert_tree_write_locked(eb);
|
||||||
return __set_page_dirty_nobuffers(page);
|
return __set_page_dirty_nobuffers(page);
|
||||||
}
|
}
|
||||||
ASSERT(PagePrivate(page) && page->private);
|
ASSERT(PagePrivate(page) && page->private);
|
||||||
@@ -1061,7 +1061,7 @@ static int btree_set_page_dirty(struct page *page)
|
|||||||
ASSERT(eb);
|
ASSERT(eb);
|
||||||
ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||||
ASSERT(atomic_read(&eb->refs));
|
ASSERT(atomic_read(&eb->refs));
|
||||||
btrfs_assert_tree_locked(eb);
|
btrfs_assert_tree_write_locked(eb);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
|
|
||||||
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
|
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
|
||||||
@@ -1125,7 +1125,7 @@ void btrfs_clean_tree_block(struct extent_buffer *buf)
|
|||||||
struct btrfs_fs_info *fs_info = buf->fs_info;
|
struct btrfs_fs_info *fs_info = buf->fs_info;
|
||||||
if (btrfs_header_generation(buf) ==
|
if (btrfs_header_generation(buf) ==
|
||||||
fs_info->running_transaction->transid) {
|
fs_info->running_transaction->transid) {
|
||||||
btrfs_assert_tree_locked(buf);
|
btrfs_assert_tree_write_locked(buf);
|
||||||
|
|
||||||
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
|
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
|
||||||
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
|
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
|
||||||
@@ -1500,7 +1500,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
|
|||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
|
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
|
||||||
root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
|
!btrfs_is_data_reloc_root(root)) {
|
||||||
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
|
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
|
||||||
btrfs_check_and_init_root_item(&root->root_item);
|
btrfs_check_and_init_root_item(&root->root_item);
|
||||||
}
|
}
|
||||||
@@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
|
|||||||
btrfs_extent_buffer_leak_debug_check(fs_info);
|
btrfs_extent_buffer_leak_debug_check(fs_info);
|
||||||
kfree(fs_info->super_copy);
|
kfree(fs_info->super_copy);
|
||||||
kfree(fs_info->super_for_commit);
|
kfree(fs_info->super_for_commit);
|
||||||
|
kfree(fs_info->subpage_info);
|
||||||
kvfree(fs_info);
|
kvfree(fs_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1953,8 +1954,7 @@ sleep:
|
|||||||
wake_up_process(fs_info->cleaner_kthread);
|
wake_up_process(fs_info->cleaner_kthread);
|
||||||
mutex_unlock(&fs_info->transaction_kthread_mutex);
|
mutex_unlock(&fs_info->transaction_kthread_mutex);
|
||||||
|
|
||||||
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
|
if (BTRFS_FS_ERROR(fs_info))
|
||||||
&fs_info->fs_state)))
|
|
||||||
btrfs_cleanup_transaction(fs_info);
|
btrfs_cleanup_transaction(fs_info);
|
||||||
if (!kthread_should_stop() &&
|
if (!kthread_should_stop() &&
|
||||||
(!btrfs_transaction_blocked(fs_info) ||
|
(!btrfs_transaction_blocked(fs_info) ||
|
||||||
@@ -2592,8 +2592,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* For 4K page size, we only support 4K sector size.
|
* For 4K page size, we only support 4K sector size.
|
||||||
* For 64K page size, we support read-write for 64K sector size, and
|
* For 64K page size, we support 64K and 4K sector sizes.
|
||||||
* read-only for 4K sector size.
|
|
||||||
*/
|
*/
|
||||||
if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
|
if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
|
||||||
(PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
|
(PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
|
||||||
@@ -2883,6 +2882,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
|||||||
spin_lock_init(&fs_info->buffer_lock);
|
spin_lock_init(&fs_info->buffer_lock);
|
||||||
spin_lock_init(&fs_info->unused_bgs_lock);
|
spin_lock_init(&fs_info->unused_bgs_lock);
|
||||||
spin_lock_init(&fs_info->treelog_bg_lock);
|
spin_lock_init(&fs_info->treelog_bg_lock);
|
||||||
|
spin_lock_init(&fs_info->zone_active_bgs_lock);
|
||||||
|
spin_lock_init(&fs_info->relocation_bg_lock);
|
||||||
rwlock_init(&fs_info->tree_mod_log_lock);
|
rwlock_init(&fs_info->tree_mod_log_lock);
|
||||||
mutex_init(&fs_info->unused_bg_unpin_mutex);
|
mutex_init(&fs_info->unused_bg_unpin_mutex);
|
||||||
mutex_init(&fs_info->reclaim_bgs_lock);
|
mutex_init(&fs_info->reclaim_bgs_lock);
|
||||||
@@ -2896,6 +2897,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
|||||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||||
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
||||||
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
|
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
|
||||||
|
INIT_LIST_HEAD(&fs_info->zone_active_bgs);
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
INIT_LIST_HEAD(&fs_info->allocated_roots);
|
INIT_LIST_HEAD(&fs_info->allocated_roots);
|
||||||
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
||||||
@@ -3228,12 +3230,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||||||
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
|
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
|
||||||
btrfs_init_btree_inode(fs_info);
|
btrfs_init_btree_inode(fs_info);
|
||||||
|
|
||||||
invalidate_bdev(fs_devices->latest_bdev);
|
invalidate_bdev(fs_devices->latest_dev->bdev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read super block and check the signature bytes only
|
* Read super block and check the signature bytes only
|
||||||
*/
|
*/
|
||||||
disk_super = btrfs_read_dev_super(fs_devices->latest_bdev);
|
disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
|
||||||
if (IS_ERR(disk_super)) {
|
if (IS_ERR(disk_super)) {
|
||||||
err = PTR_ERR(disk_super);
|
err = PTR_ERR(disk_super);
|
||||||
goto fail_alloc;
|
goto fail_alloc;
|
||||||
@@ -3392,12 +3394,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||||||
goto fail_alloc;
|
goto fail_alloc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sectorsize != PAGE_SIZE) {
|
if (sectorsize < PAGE_SIZE) {
|
||||||
|
struct btrfs_subpage_info *subpage_info;
|
||||||
|
|
||||||
btrfs_warn(fs_info,
|
btrfs_warn(fs_info,
|
||||||
"read-write for sector size %u with page size %lu is experimental",
|
"read-write for sector size %u with page size %lu is experimental",
|
||||||
sectorsize, PAGE_SIZE);
|
sectorsize, PAGE_SIZE);
|
||||||
}
|
|
||||||
if (sectorsize != PAGE_SIZE) {
|
|
||||||
if (btrfs_super_incompat_flags(fs_info->super_copy) &
|
if (btrfs_super_incompat_flags(fs_info->super_copy) &
|
||||||
BTRFS_FEATURE_INCOMPAT_RAID56) {
|
BTRFS_FEATURE_INCOMPAT_RAID56) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
@@ -3406,6 +3408,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
goto fail_alloc;
|
goto fail_alloc;
|
||||||
}
|
}
|
||||||
|
subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
|
||||||
|
if (!subpage_info)
|
||||||
|
goto fail_alloc;
|
||||||
|
btrfs_init_subpage_info(subpage_info, sectorsize);
|
||||||
|
fs_info->subpage_info = subpage_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_init_workqueues(fs_info, fs_devices);
|
ret = btrfs_init_workqueues(fs_info, fs_devices);
|
||||||
@@ -3465,7 +3472,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||||||
* below in btrfs_init_dev_replace().
|
* below in btrfs_init_dev_replace().
|
||||||
*/
|
*/
|
||||||
btrfs_free_extra_devids(fs_devices);
|
btrfs_free_extra_devids(fs_devices);
|
||||||
if (!fs_devices->latest_bdev) {
|
if (!fs_devices->latest_dev->bdev) {
|
||||||
btrfs_err(fs_info, "failed to read devices");
|
btrfs_err(fs_info, "failed to read devices");
|
||||||
goto fail_tree_roots;
|
goto fail_tree_roots;
|
||||||
}
|
}
|
||||||
@@ -3556,7 +3563,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
|||||||
goto fail_sysfs;
|
goto fail_sysfs;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
|
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
|
||||||
|
!btrfs_check_rw_degradable(fs_info, NULL)) {
|
||||||
btrfs_warn(fs_info,
|
btrfs_warn(fs_info,
|
||||||
"writable mount is not allowed due to too many missing devices");
|
"writable mount is not allowed due to too many missing devices");
|
||||||
goto fail_sysfs;
|
goto fail_sysfs;
|
||||||
@@ -3881,7 +3889,9 @@ static int write_dev_supers(struct btrfs_device *device,
|
|||||||
bio->bi_opf |= REQ_FUA;
|
bio->bi_opf |= REQ_FUA;
|
||||||
|
|
||||||
btrfsic_submit_bio(bio);
|
btrfsic_submit_bio(bio);
|
||||||
btrfs_advance_sb_log(device, i);
|
|
||||||
|
if (btrfs_advance_sb_log(device, i))
|
||||||
|
errors++;
|
||||||
}
|
}
|
||||||
return errors < i ? 0 : -1;
|
return errors < i ? 0 : -1;
|
||||||
}
|
}
|
||||||
@@ -4221,7 +4231,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
|||||||
drop_ref = true;
|
drop_ref = true;
|
||||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||||
|
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
if (BTRFS_FS_ERROR(fs_info)) {
|
||||||
ASSERT(root->log_root == NULL);
|
ASSERT(root->log_root == NULL);
|
||||||
if (root->reloc_root) {
|
if (root->reloc_root) {
|
||||||
btrfs_put_root(root->reloc_root);
|
btrfs_put_root(root->reloc_root);
|
||||||
@@ -4372,8 +4382,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
|||||||
btrfs_err(fs_info, "commit super ret %d", ret);
|
btrfs_err(fs_info, "commit super ret %d", ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state) ||
|
if (BTRFS_FS_ERROR(fs_info))
|
||||||
test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state))
|
|
||||||
btrfs_error_commit_super(fs_info);
|
btrfs_error_commit_super(fs_info);
|
||||||
|
|
||||||
kthread_stop(fs_info->transaction_kthread);
|
kthread_stop(fs_info->transaction_kthread);
|
||||||
@@ -4470,7 +4479,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
|
|||||||
if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
|
if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
btrfs_assert_tree_locked(buf);
|
btrfs_assert_tree_write_locked(buf);
|
||||||
if (transid != fs_info->generation)
|
if (transid != fs_info->generation)
|
||||||
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
|
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
|
||||||
buf->start, transid, fs_info->generation);
|
buf->start, transid, fs_info->generation);
|
||||||
|
|||||||
@@ -6,9 +6,6 @@
|
|||||||
#ifndef BTRFS_DISK_IO_H
|
#ifndef BTRFS_DISK_IO_H
|
||||||
#define BTRFS_DISK_IO_H
|
#define BTRFS_DISK_IO_H
|
||||||
|
|
||||||
#define BTRFS_SUPER_INFO_OFFSET SZ_64K
|
|
||||||
#define BTRFS_SUPER_INFO_SIZE 4096
|
|
||||||
|
|
||||||
#define BTRFS_SUPER_MIRROR_MAX 3
|
#define BTRFS_SUPER_MIRROR_MAX 3
|
||||||
#define BTRFS_SUPER_MIRROR_SHIFT 12
|
#define BTRFS_SUPER_MIRROR_SHIFT 12
|
||||||
|
|
||||||
@@ -81,7 +78,7 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
|||||||
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
||||||
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_root *root);
|
struct btrfs_root *root);
|
||||||
int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio,
|
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
|
||||||
struct page *page, u64 start, u64 end,
|
struct page *page, u64 start, u64 end,
|
||||||
int mirror);
|
int mirror);
|
||||||
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
|
||||||
|
|||||||
@@ -1266,7 +1266,7 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int do_discard_extent(struct btrfs_bio_stripe *stripe, u64 *bytes)
|
static int do_discard_extent(struct btrfs_io_stripe *stripe, u64 *bytes)
|
||||||
{
|
{
|
||||||
struct btrfs_device *dev = stripe->dev;
|
struct btrfs_device *dev = stripe->dev;
|
||||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||||
@@ -1313,22 +1313,21 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||||||
u64 discarded_bytes = 0;
|
u64 discarded_bytes = 0;
|
||||||
u64 end = bytenr + num_bytes;
|
u64 end = bytenr + num_bytes;
|
||||||
u64 cur = bytenr;
|
u64 cur = bytenr;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Avoid races with device replace and make sure our bbio has devices
|
* Avoid races with device replace and make sure our bioc has devices
|
||||||
* associated to its stripes that don't go away while we are discarding.
|
* associated to its stripes that don't go away while we are discarding.
|
||||||
*/
|
*/
|
||||||
btrfs_bio_counter_inc_blocked(fs_info);
|
btrfs_bio_counter_inc_blocked(fs_info);
|
||||||
while (cur < end) {
|
while (cur < end) {
|
||||||
struct btrfs_bio_stripe *stripe;
|
struct btrfs_io_stripe *stripe;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
num_bytes = end - cur;
|
num_bytes = end - cur;
|
||||||
/* Tell the block device(s) that the sectors can be discarded */
|
/* Tell the block device(s) that the sectors can be discarded */
|
||||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
|
ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
|
||||||
&num_bytes, &bbio, 0);
|
&num_bytes, &bioc, 0);
|
||||||
/*
|
/*
|
||||||
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
|
* Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
|
||||||
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
|
* -EOPNOTSUPP. For any such error, @num_bytes is not updated,
|
||||||
@@ -1337,8 +1336,8 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
stripe = bbio->stripes;
|
stripe = bioc->stripes;
|
||||||
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
|
for (i = 0; i < bioc->num_stripes; i++, stripe++) {
|
||||||
u64 bytes;
|
u64 bytes;
|
||||||
struct btrfs_device *device = stripe->dev;
|
struct btrfs_device *device = stripe->dev;
|
||||||
|
|
||||||
@@ -1361,7 +1360,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||||||
* And since there are two loops, explicitly
|
* And since there are two loops, explicitly
|
||||||
* go to out to avoid confusion.
|
* go to out to avoid confusion.
|
||||||
*/
|
*/
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1372,7 +1371,7 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
|
|||||||
*/
|
*/
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
cur += num_bytes;
|
cur += num_bytes;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
@@ -1397,7 +1396,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
|||||||
ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
|
ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
|
||||||
generic_ref->action);
|
generic_ref->action);
|
||||||
BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
|
BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
|
||||||
generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID);
|
generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
|
||||||
|
|
||||||
if (generic_ref->type == BTRFS_REF_METADATA)
|
if (generic_ref->type == BTRFS_REF_METADATA)
|
||||||
ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
|
ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
|
||||||
@@ -2376,7 +2375,7 @@ int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
|
|||||||
|
|
||||||
out:
|
out:
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
|
if (btrfs_is_data_reloc_root(root))
|
||||||
WARN_ON(ret > 0);
|
WARN_ON(ret > 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -2438,10 +2437,9 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|||||||
key.offset -= btrfs_file_extent_offset(buf, fi);
|
key.offset -= btrfs_file_extent_offset(buf, fi);
|
||||||
btrfs_init_generic_ref(&generic_ref, action, bytenr,
|
btrfs_init_generic_ref(&generic_ref, action, bytenr,
|
||||||
num_bytes, parent);
|
num_bytes, parent);
|
||||||
generic_ref.real_root = root->root_key.objectid;
|
|
||||||
btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
|
btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
|
||||||
key.offset);
|
key.offset, root->root_key.objectid,
|
||||||
generic_ref.skip_qgroup = for_reloc;
|
for_reloc);
|
||||||
if (inc)
|
if (inc)
|
||||||
ret = btrfs_inc_extent_ref(trans, &generic_ref);
|
ret = btrfs_inc_extent_ref(trans, &generic_ref);
|
||||||
else
|
else
|
||||||
@@ -2453,9 +2451,8 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
|
|||||||
num_bytes = fs_info->nodesize;
|
num_bytes = fs_info->nodesize;
|
||||||
btrfs_init_generic_ref(&generic_ref, action, bytenr,
|
btrfs_init_generic_ref(&generic_ref, action, bytenr,
|
||||||
num_bytes, parent);
|
num_bytes, parent);
|
||||||
generic_ref.real_root = root->root_key.objectid;
|
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
|
||||||
btrfs_init_tree_ref(&generic_ref, level - 1, ref_root);
|
root->root_key.objectid, for_reloc);
|
||||||
generic_ref.skip_qgroup = for_reloc;
|
|
||||||
if (inc)
|
if (inc)
|
||||||
ret = btrfs_inc_extent_ref(trans, &generic_ref);
|
ret = btrfs_inc_extent_ref(trans, &generic_ref);
|
||||||
else
|
else
|
||||||
@@ -3196,7 +3193,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0);
|
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
goto out;
|
goto out;
|
||||||
@@ -3289,7 +3286,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
|||||||
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
|
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
|
||||||
buf->start, buf->len, parent);
|
buf->start, buf->len, parent);
|
||||||
btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
|
btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
|
||||||
root->root_key.objectid);
|
root->root_key.objectid, 0, false);
|
||||||
|
|
||||||
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
|
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
|
||||||
btrfs_ref_tree_mod(fs_info, &generic_ref);
|
btrfs_ref_tree_mod(fs_info, &generic_ref);
|
||||||
@@ -3373,9 +3370,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
|
|||||||
* tree, just update pinning info and exit early.
|
* tree, just update pinning info and exit early.
|
||||||
*/
|
*/
|
||||||
if ((ref->type == BTRFS_REF_METADATA &&
|
if ((ref->type == BTRFS_REF_METADATA &&
|
||||||
ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
|
ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
|
||||||
(ref->type == BTRFS_REF_DATA &&
|
(ref->type == BTRFS_REF_DATA &&
|
||||||
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
|
ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
|
||||||
/* unlocks the pinned mutex */
|
/* unlocks the pinned mutex */
|
||||||
btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
|
btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
@@ -3386,9 +3383,9 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!((ref->type == BTRFS_REF_METADATA &&
|
if (!((ref->type == BTRFS_REF_METADATA &&
|
||||||
ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) ||
|
ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
|
||||||
(ref->type == BTRFS_REF_DATA &&
|
(ref->type == BTRFS_REF_DATA &&
|
||||||
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)))
|
ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
|
||||||
btrfs_ref_tree_mod(fs_info, ref);
|
btrfs_ref_tree_mod(fs_info, ref);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -3476,7 +3473,9 @@ enum btrfs_extent_allocation_policy {
|
|||||||
*/
|
*/
|
||||||
struct find_free_extent_ctl {
|
struct find_free_extent_ctl {
|
||||||
/* Basic allocation info */
|
/* Basic allocation info */
|
||||||
|
u64 ram_bytes;
|
||||||
u64 num_bytes;
|
u64 num_bytes;
|
||||||
|
u64 min_alloc_size;
|
||||||
u64 empty_size;
|
u64 empty_size;
|
||||||
u64 flags;
|
u64 flags;
|
||||||
int delalloc;
|
int delalloc;
|
||||||
@@ -3495,6 +3494,9 @@ struct find_free_extent_ctl {
|
|||||||
/* Allocation is called for tree-log */
|
/* Allocation is called for tree-log */
|
||||||
bool for_treelog;
|
bool for_treelog;
|
||||||
|
|
||||||
|
/* Allocation is called for data relocation */
|
||||||
|
bool for_data_reloc;
|
||||||
|
|
||||||
/* RAID index, converted from flags */
|
/* RAID index, converted from flags */
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
@@ -3756,8 +3758,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
|||||||
u64 avail;
|
u64 avail;
|
||||||
u64 bytenr = block_group->start;
|
u64 bytenr = block_group->start;
|
||||||
u64 log_bytenr;
|
u64 log_bytenr;
|
||||||
|
u64 data_reloc_bytenr;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
bool skip;
|
bool skip = false;
|
||||||
|
|
||||||
ASSERT(btrfs_is_zoned(block_group->fs_info));
|
ASSERT(btrfs_is_zoned(block_group->fs_info));
|
||||||
|
|
||||||
@@ -3767,19 +3770,49 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
|||||||
*/
|
*/
|
||||||
spin_lock(&fs_info->treelog_bg_lock);
|
spin_lock(&fs_info->treelog_bg_lock);
|
||||||
log_bytenr = fs_info->treelog_bg;
|
log_bytenr = fs_info->treelog_bg;
|
||||||
skip = log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
|
if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
|
||||||
(!ffe_ctl->for_treelog && bytenr == log_bytenr));
|
(!ffe_ctl->for_treelog && bytenr == log_bytenr)))
|
||||||
|
skip = true;
|
||||||
spin_unlock(&fs_info->treelog_bg_lock);
|
spin_unlock(&fs_info->treelog_bg_lock);
|
||||||
if (skip)
|
if (skip)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not allow non-relocation blocks in the dedicated relocation block
|
||||||
|
* group, and vice versa.
|
||||||
|
*/
|
||||||
|
spin_lock(&fs_info->relocation_bg_lock);
|
||||||
|
data_reloc_bytenr = fs_info->data_reloc_bg;
|
||||||
|
if (data_reloc_bytenr &&
|
||||||
|
((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
|
||||||
|
(!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
|
||||||
|
skip = true;
|
||||||
|
spin_unlock(&fs_info->relocation_bg_lock);
|
||||||
|
if (skip)
|
||||||
|
return 1;
|
||||||
|
/* Check RO and no space case before trying to activate it */
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
if (block_group->ro ||
|
||||||
|
block_group->alloc_offset == block_group->zone_capacity) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
if (!btrfs_zone_activate(block_group))
|
||||||
|
return 1;
|
||||||
|
|
||||||
spin_lock(&space_info->lock);
|
spin_lock(&space_info->lock);
|
||||||
spin_lock(&block_group->lock);
|
spin_lock(&block_group->lock);
|
||||||
spin_lock(&fs_info->treelog_bg_lock);
|
spin_lock(&fs_info->treelog_bg_lock);
|
||||||
|
spin_lock(&fs_info->relocation_bg_lock);
|
||||||
|
|
||||||
ASSERT(!ffe_ctl->for_treelog ||
|
ASSERT(!ffe_ctl->for_treelog ||
|
||||||
block_group->start == fs_info->treelog_bg ||
|
block_group->start == fs_info->treelog_bg ||
|
||||||
fs_info->treelog_bg == 0);
|
fs_info->treelog_bg == 0);
|
||||||
|
ASSERT(!ffe_ctl->for_data_reloc ||
|
||||||
|
block_group->start == fs_info->data_reloc_bg ||
|
||||||
|
fs_info->data_reloc_bg == 0);
|
||||||
|
|
||||||
if (block_group->ro) {
|
if (block_group->ro) {
|
||||||
ret = 1;
|
ret = 1;
|
||||||
@@ -3796,7 +3829,18 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
avail = block_group->length - block_group->alloc_offset;
|
/*
|
||||||
|
* Do not allow currently used block group to be the data relocation
|
||||||
|
* dedicated block group.
|
||||||
|
*/
|
||||||
|
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
|
||||||
|
(block_group->used || block_group->reserved)) {
|
||||||
|
ret = 1;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
|
||||||
|
avail = block_group->zone_capacity - block_group->alloc_offset;
|
||||||
if (avail < num_bytes) {
|
if (avail < num_bytes) {
|
||||||
if (ffe_ctl->max_extent_size < avail) {
|
if (ffe_ctl->max_extent_size < avail) {
|
||||||
/*
|
/*
|
||||||
@@ -3813,6 +3857,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
|||||||
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
|
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
|
||||||
fs_info->treelog_bg = block_group->start;
|
fs_info->treelog_bg = block_group->start;
|
||||||
|
|
||||||
|
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
|
||||||
|
fs_info->data_reloc_bg = block_group->start;
|
||||||
|
|
||||||
ffe_ctl->found_offset = start + block_group->alloc_offset;
|
ffe_ctl->found_offset = start + block_group->alloc_offset;
|
||||||
block_group->alloc_offset += num_bytes;
|
block_group->alloc_offset += num_bytes;
|
||||||
spin_lock(&ctl->tree_lock);
|
spin_lock(&ctl->tree_lock);
|
||||||
@@ -3829,6 +3876,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
|||||||
out:
|
out:
|
||||||
if (ret && ffe_ctl->for_treelog)
|
if (ret && ffe_ctl->for_treelog)
|
||||||
fs_info->treelog_bg = 0;
|
fs_info->treelog_bg = 0;
|
||||||
|
if (ret && ffe_ctl->for_data_reloc)
|
||||||
|
fs_info->data_reloc_bg = 0;
|
||||||
|
spin_unlock(&fs_info->relocation_bg_lock);
|
||||||
spin_unlock(&fs_info->treelog_bg_lock);
|
spin_unlock(&fs_info->treelog_bg_lock);
|
||||||
spin_unlock(&block_group->lock);
|
spin_unlock(&block_group->lock);
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
@@ -3932,18 +3982,30 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
|||||||
ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
|
ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
|
||||||
ffe_ctl->orig_have_caching_bg = true;
|
ffe_ctl->orig_have_caching_bg = true;
|
||||||
|
|
||||||
if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT &&
|
|
||||||
ffe_ctl->have_caching_bg)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (ins->objectid) {
|
if (ins->objectid) {
|
||||||
found_extent(ffe_ctl, ins);
|
found_extent(ffe_ctl, ins);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
|
||||||
|
!btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->index)) {
|
||||||
|
/*
|
||||||
|
* If we have enough free space left in an already active block
|
||||||
|
* group and we can't activate any other zone now, retry the
|
||||||
|
* active ones with a smaller allocation size. Returning early
|
||||||
|
* from here will tell btrfs_reserve_extent() to haven the
|
||||||
|
* size.
|
||||||
|
*/
|
||||||
|
return -ENOSPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
ffe_ctl->index++;
|
||||||
|
if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
|
||||||
|
return 1;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
|
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
|
||||||
* caching kthreads as we move along
|
* caching kthreads as we move along
|
||||||
@@ -4085,6 +4147,12 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
|
|||||||
ffe_ctl->hint_byte = fs_info->treelog_bg;
|
ffe_ctl->hint_byte = fs_info->treelog_bg;
|
||||||
spin_unlock(&fs_info->treelog_bg_lock);
|
spin_unlock(&fs_info->treelog_bg_lock);
|
||||||
}
|
}
|
||||||
|
if (ffe_ctl->for_data_reloc) {
|
||||||
|
spin_lock(&fs_info->relocation_bg_lock);
|
||||||
|
if (fs_info->data_reloc_bg)
|
||||||
|
ffe_ctl->hint_byte = fs_info->data_reloc_bg;
|
||||||
|
spin_unlock(&fs_info->relocation_bg_lock);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
default:
|
default:
|
||||||
BUG();
|
BUG();
|
||||||
@@ -4117,65 +4185,62 @@ static int prepare_allocation(struct btrfs_fs_info *fs_info,
|
|||||||
* |- If not found, re-iterate all block groups
|
* |- If not found, re-iterate all block groups
|
||||||
*/
|
*/
|
||||||
static noinline int find_free_extent(struct btrfs_root *root,
|
static noinline int find_free_extent(struct btrfs_root *root,
|
||||||
u64 ram_bytes, u64 num_bytes, u64 empty_size,
|
struct btrfs_key *ins,
|
||||||
u64 hint_byte_orig, struct btrfs_key *ins,
|
struct find_free_extent_ctl *ffe_ctl)
|
||||||
u64 flags, int delalloc)
|
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int cache_block_group_error = 0;
|
int cache_block_group_error = 0;
|
||||||
struct btrfs_block_group *block_group = NULL;
|
struct btrfs_block_group *block_group = NULL;
|
||||||
struct find_free_extent_ctl ffe_ctl = {0};
|
|
||||||
struct btrfs_space_info *space_info;
|
struct btrfs_space_info *space_info;
|
||||||
bool full_search = false;
|
bool full_search = false;
|
||||||
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
|
||||||
|
|
||||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
|
||||||
|
|
||||||
ffe_ctl.num_bytes = num_bytes;
|
|
||||||
ffe_ctl.empty_size = empty_size;
|
|
||||||
ffe_ctl.flags = flags;
|
|
||||||
ffe_ctl.search_start = 0;
|
|
||||||
ffe_ctl.delalloc = delalloc;
|
|
||||||
ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
|
|
||||||
ffe_ctl.have_caching_bg = false;
|
|
||||||
ffe_ctl.orig_have_caching_bg = false;
|
|
||||||
ffe_ctl.found_offset = 0;
|
|
||||||
ffe_ctl.hint_byte = hint_byte_orig;
|
|
||||||
ffe_ctl.for_treelog = for_treelog;
|
|
||||||
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
|
|
||||||
|
|
||||||
|
ffe_ctl->search_start = 0;
|
||||||
/* For clustered allocation */
|
/* For clustered allocation */
|
||||||
ffe_ctl.retry_clustered = false;
|
ffe_ctl->empty_cluster = 0;
|
||||||
ffe_ctl.retry_unclustered = false;
|
ffe_ctl->last_ptr = NULL;
|
||||||
ffe_ctl.last_ptr = NULL;
|
ffe_ctl->use_cluster = true;
|
||||||
ffe_ctl.use_cluster = true;
|
ffe_ctl->have_caching_bg = false;
|
||||||
|
ffe_ctl->orig_have_caching_bg = false;
|
||||||
|
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
|
||||||
|
ffe_ctl->loop = 0;
|
||||||
|
/* For clustered allocation */
|
||||||
|
ffe_ctl->retry_clustered = false;
|
||||||
|
ffe_ctl->retry_unclustered = false;
|
||||||
|
ffe_ctl->cached = 0;
|
||||||
|
ffe_ctl->max_extent_size = 0;
|
||||||
|
ffe_ctl->total_free_space = 0;
|
||||||
|
ffe_ctl->found_offset = 0;
|
||||||
|
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
|
||||||
|
|
||||||
if (btrfs_is_zoned(fs_info))
|
if (btrfs_is_zoned(fs_info))
|
||||||
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_ZONED;
|
ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
|
||||||
|
|
||||||
ins->type = BTRFS_EXTENT_ITEM_KEY;
|
ins->type = BTRFS_EXTENT_ITEM_KEY;
|
||||||
ins->objectid = 0;
|
ins->objectid = 0;
|
||||||
ins->offset = 0;
|
ins->offset = 0;
|
||||||
|
|
||||||
trace_find_free_extent(root, num_bytes, empty_size, flags);
|
trace_find_free_extent(root, ffe_ctl->num_bytes, ffe_ctl->empty_size,
|
||||||
|
ffe_ctl->flags);
|
||||||
|
|
||||||
space_info = btrfs_find_space_info(fs_info, flags);
|
space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
|
||||||
if (!space_info) {
|
if (!space_info) {
|
||||||
btrfs_err(fs_info, "No space info for %llu", flags);
|
btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
|
||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
|
ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ffe_ctl.search_start = max(ffe_ctl.search_start,
|
ffe_ctl->search_start = max(ffe_ctl->search_start,
|
||||||
first_logical_byte(fs_info, 0));
|
first_logical_byte(fs_info, 0));
|
||||||
ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
|
ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
|
||||||
if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
|
if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
|
||||||
block_group = btrfs_lookup_block_group(fs_info,
|
block_group = btrfs_lookup_block_group(fs_info,
|
||||||
ffe_ctl.search_start);
|
ffe_ctl->search_start);
|
||||||
/*
|
/*
|
||||||
* we don't want to use the block group if it doesn't match our
|
* we don't want to use the block group if it doesn't match our
|
||||||
* allocation bits, or if its not cached.
|
* allocation bits, or if its not cached.
|
||||||
@@ -4183,7 +4248,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
|||||||
* However if we are re-searching with an ideal block group
|
* However if we are re-searching with an ideal block group
|
||||||
* picked out then we don't care that the block group is cached.
|
* picked out then we don't care that the block group is cached.
|
||||||
*/
|
*/
|
||||||
if (block_group && block_group_bits(block_group, flags) &&
|
if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
|
||||||
block_group->cached != BTRFS_CACHE_NO) {
|
block_group->cached != BTRFS_CACHE_NO) {
|
||||||
down_read(&space_info->groups_sem);
|
down_read(&space_info->groups_sem);
|
||||||
if (list_empty(&block_group->list) ||
|
if (list_empty(&block_group->list) ||
|
||||||
@@ -4197,9 +4262,10 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
|||||||
btrfs_put_block_group(block_group);
|
btrfs_put_block_group(block_group);
|
||||||
up_read(&space_info->groups_sem);
|
up_read(&space_info->groups_sem);
|
||||||
} else {
|
} else {
|
||||||
ffe_ctl.index = btrfs_bg_flags_to_raid_index(
|
ffe_ctl->index = btrfs_bg_flags_to_raid_index(
|
||||||
block_group->flags);
|
block_group->flags);
|
||||||
btrfs_lock_block_group(block_group, delalloc);
|
btrfs_lock_block_group(block_group,
|
||||||
|
ffe_ctl->delalloc);
|
||||||
goto have_block_group;
|
goto have_block_group;
|
||||||
}
|
}
|
||||||
} else if (block_group) {
|
} else if (block_group) {
|
||||||
@@ -4207,31 +4273,33 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
search:
|
search:
|
||||||
ffe_ctl.have_caching_bg = false;
|
ffe_ctl->have_caching_bg = false;
|
||||||
if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) ||
|
if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
|
||||||
ffe_ctl.index == 0)
|
ffe_ctl->index == 0)
|
||||||
full_search = true;
|
full_search = true;
|
||||||
down_read(&space_info->groups_sem);
|
down_read(&space_info->groups_sem);
|
||||||
list_for_each_entry(block_group,
|
list_for_each_entry(block_group,
|
||||||
&space_info->block_groups[ffe_ctl.index], list) {
|
&space_info->block_groups[ffe_ctl->index], list) {
|
||||||
struct btrfs_block_group *bg_ret;
|
struct btrfs_block_group *bg_ret;
|
||||||
|
|
||||||
/* If the block group is read-only, we can skip it entirely. */
|
/* If the block group is read-only, we can skip it entirely. */
|
||||||
if (unlikely(block_group->ro)) {
|
if (unlikely(block_group->ro)) {
|
||||||
if (for_treelog)
|
if (ffe_ctl->for_treelog)
|
||||||
btrfs_clear_treelog_bg(block_group);
|
btrfs_clear_treelog_bg(block_group);
|
||||||
|
if (ffe_ctl->for_data_reloc)
|
||||||
|
btrfs_clear_data_reloc_bg(block_group);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_grab_block_group(block_group, delalloc);
|
btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
|
||||||
ffe_ctl.search_start = block_group->start;
|
ffe_ctl->search_start = block_group->start;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this can happen if we end up cycling through all the
|
* this can happen if we end up cycling through all the
|
||||||
* raid types, but we want to make sure we only allocate
|
* raid types, but we want to make sure we only allocate
|
||||||
* for the proper type.
|
* for the proper type.
|
||||||
*/
|
*/
|
||||||
if (!block_group_bits(block_group, flags)) {
|
if (!block_group_bits(block_group, ffe_ctl->flags)) {
|
||||||
u64 extra = BTRFS_BLOCK_GROUP_DUP |
|
u64 extra = BTRFS_BLOCK_GROUP_DUP |
|
||||||
BTRFS_BLOCK_GROUP_RAID1_MASK |
|
BTRFS_BLOCK_GROUP_RAID1_MASK |
|
||||||
BTRFS_BLOCK_GROUP_RAID56_MASK |
|
BTRFS_BLOCK_GROUP_RAID56_MASK |
|
||||||
@@ -4242,7 +4310,7 @@ search:
|
|||||||
* doesn't provide them, bail. This does allow us to
|
* doesn't provide them, bail. This does allow us to
|
||||||
* fill raid0 from raid1.
|
* fill raid0 from raid1.
|
||||||
*/
|
*/
|
||||||
if ((flags & extra) && !(block_group->flags & extra))
|
if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
|
||||||
goto loop;
|
goto loop;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -4250,14 +4318,14 @@ search:
|
|||||||
* It's possible that we have MIXED_GROUP flag but no
|
* It's possible that we have MIXED_GROUP flag but no
|
||||||
* block group is mixed. Just skip such block group.
|
* block group is mixed. Just skip such block group.
|
||||||
*/
|
*/
|
||||||
btrfs_release_block_group(block_group, delalloc);
|
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
have_block_group:
|
have_block_group:
|
||||||
ffe_ctl.cached = btrfs_block_group_done(block_group);
|
ffe_ctl->cached = btrfs_block_group_done(block_group);
|
||||||
if (unlikely(!ffe_ctl.cached)) {
|
if (unlikely(!ffe_ctl->cached)) {
|
||||||
ffe_ctl.have_caching_bg = true;
|
ffe_ctl->have_caching_bg = true;
|
||||||
ret = btrfs_cache_block_group(block_group, 0);
|
ret = btrfs_cache_block_group(block_group, 0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -4280,10 +4348,11 @@ have_block_group:
|
|||||||
goto loop;
|
goto loop;
|
||||||
|
|
||||||
bg_ret = NULL;
|
bg_ret = NULL;
|
||||||
ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
|
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
if (bg_ret && bg_ret != block_group) {
|
if (bg_ret && bg_ret != block_group) {
|
||||||
btrfs_release_block_group(block_group, delalloc);
|
btrfs_release_block_group(block_group,
|
||||||
|
ffe_ctl->delalloc);
|
||||||
block_group = bg_ret;
|
block_group = bg_ret;
|
||||||
}
|
}
|
||||||
} else if (ret == -EAGAIN) {
|
} else if (ret == -EAGAIN) {
|
||||||
@@ -4293,46 +4362,49 @@ have_block_group:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Checks */
|
/* Checks */
|
||||||
ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
|
ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
|
||||||
fs_info->stripesize);
|
fs_info->stripesize);
|
||||||
|
|
||||||
/* move on to the next group */
|
/* move on to the next group */
|
||||||
if (ffe_ctl.search_start + num_bytes >
|
if (ffe_ctl->search_start + ffe_ctl->num_bytes >
|
||||||
block_group->start + block_group->length) {
|
block_group->start + block_group->length) {
|
||||||
btrfs_add_free_space_unused(block_group,
|
btrfs_add_free_space_unused(block_group,
|
||||||
ffe_ctl.found_offset, num_bytes);
|
ffe_ctl->found_offset,
|
||||||
|
ffe_ctl->num_bytes);
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ffe_ctl.found_offset < ffe_ctl.search_start)
|
if (ffe_ctl->found_offset < ffe_ctl->search_start)
|
||||||
btrfs_add_free_space_unused(block_group,
|
btrfs_add_free_space_unused(block_group,
|
||||||
ffe_ctl.found_offset,
|
ffe_ctl->found_offset,
|
||||||
ffe_ctl.search_start - ffe_ctl.found_offset);
|
ffe_ctl->search_start - ffe_ctl->found_offset);
|
||||||
|
|
||||||
ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
|
ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
|
||||||
num_bytes, delalloc);
|
ffe_ctl->num_bytes,
|
||||||
|
ffe_ctl->delalloc);
|
||||||
if (ret == -EAGAIN) {
|
if (ret == -EAGAIN) {
|
||||||
btrfs_add_free_space_unused(block_group,
|
btrfs_add_free_space_unused(block_group,
|
||||||
ffe_ctl.found_offset, num_bytes);
|
ffe_ctl->found_offset,
|
||||||
|
ffe_ctl->num_bytes);
|
||||||
goto loop;
|
goto loop;
|
||||||
}
|
}
|
||||||
btrfs_inc_block_group_reservations(block_group);
|
btrfs_inc_block_group_reservations(block_group);
|
||||||
|
|
||||||
/* we are all good, lets return */
|
/* we are all good, lets return */
|
||||||
ins->objectid = ffe_ctl.search_start;
|
ins->objectid = ffe_ctl->search_start;
|
||||||
ins->offset = num_bytes;
|
ins->offset = ffe_ctl->num_bytes;
|
||||||
|
|
||||||
trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start,
|
trace_btrfs_reserve_extent(block_group, ffe_ctl->search_start,
|
||||||
num_bytes);
|
ffe_ctl->num_bytes);
|
||||||
btrfs_release_block_group(block_group, delalloc);
|
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
|
||||||
break;
|
break;
|
||||||
loop:
|
loop:
|
||||||
release_block_group(block_group, &ffe_ctl, delalloc);
|
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
up_read(&space_info->groups_sem);
|
up_read(&space_info->groups_sem);
|
||||||
|
|
||||||
ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
|
ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
goto search;
|
goto search;
|
||||||
|
|
||||||
@@ -4341,12 +4413,12 @@ loop:
|
|||||||
* Use ffe_ctl->total_free_space as fallback if we can't find
|
* Use ffe_ctl->total_free_space as fallback if we can't find
|
||||||
* any contiguous hole.
|
* any contiguous hole.
|
||||||
*/
|
*/
|
||||||
if (!ffe_ctl.max_extent_size)
|
if (!ffe_ctl->max_extent_size)
|
||||||
ffe_ctl.max_extent_size = ffe_ctl.total_free_space;
|
ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
|
||||||
spin_lock(&space_info->lock);
|
spin_lock(&space_info->lock);
|
||||||
space_info->max_extent_size = ffe_ctl.max_extent_size;
|
space_info->max_extent_size = ffe_ctl->max_extent_size;
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
ins->offset = ffe_ctl.max_extent_size;
|
ins->offset = ffe_ctl->max_extent_size;
|
||||||
} else if (ret == -ENOSPC) {
|
} else if (ret == -ENOSPC) {
|
||||||
ret = cache_block_group_error;
|
ret = cache_block_group_error;
|
||||||
}
|
}
|
||||||
@@ -4404,16 +4476,28 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
|
|||||||
struct btrfs_key *ins, int is_data, int delalloc)
|
struct btrfs_key *ins, int is_data, int delalloc)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
|
struct find_free_extent_ctl ffe_ctl = {};
|
||||||
bool final_tried = num_bytes == min_alloc_size;
|
bool final_tried = num_bytes == min_alloc_size;
|
||||||
u64 flags;
|
u64 flags;
|
||||||
int ret;
|
int ret;
|
||||||
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
|
||||||
|
bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
|
||||||
|
|
||||||
flags = get_alloc_profile_by_root(root, is_data);
|
flags = get_alloc_profile_by_root(root, is_data);
|
||||||
again:
|
again:
|
||||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
WARN_ON(num_bytes < fs_info->sectorsize);
|
||||||
ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
|
|
||||||
hint_byte, ins, flags, delalloc);
|
ffe_ctl.ram_bytes = ram_bytes;
|
||||||
|
ffe_ctl.num_bytes = num_bytes;
|
||||||
|
ffe_ctl.min_alloc_size = min_alloc_size;
|
||||||
|
ffe_ctl.empty_size = empty_size;
|
||||||
|
ffe_ctl.flags = flags;
|
||||||
|
ffe_ctl.delalloc = delalloc;
|
||||||
|
ffe_ctl.hint_byte = hint_byte;
|
||||||
|
ffe_ctl.for_treelog = for_treelog;
|
||||||
|
ffe_ctl.for_data_reloc = for_data_reloc;
|
||||||
|
|
||||||
|
ret = find_free_extent(root, ins, &ffe_ctl);
|
||||||
if (!ret && !is_data) {
|
if (!ret && !is_data) {
|
||||||
btrfs_dec_block_group_reservations(fs_info, ins->objectid);
|
btrfs_dec_block_group_reservations(fs_info, ins->objectid);
|
||||||
} else if (ret == -ENOSPC) {
|
} else if (ret == -ENOSPC) {
|
||||||
@@ -4431,8 +4515,8 @@ again:
|
|||||||
|
|
||||||
sinfo = btrfs_find_space_info(fs_info, flags);
|
sinfo = btrfs_find_space_info(fs_info, flags);
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"allocation failed flags %llu, wanted %llu tree-log %d",
|
"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
|
||||||
flags, num_bytes, for_treelog);
|
flags, num_bytes, for_treelog, for_data_reloc);
|
||||||
if (sinfo)
|
if (sinfo)
|
||||||
btrfs_dump_space_info(fs_info, sinfo,
|
btrfs_dump_space_info(fs_info, sinfo,
|
||||||
num_bytes, 1);
|
num_bytes, 1);
|
||||||
@@ -4543,7 +4627,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1);
|
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
|
||||||
if (ret) { /* -ENOENT, logic error */
|
if (ret) { /* -ENOENT, logic error */
|
||||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||||
ins->objectid, ins->offset);
|
ins->objectid, ins->offset);
|
||||||
@@ -4632,7 +4716,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = btrfs_update_block_group(trans, extent_key.objectid,
|
ret = btrfs_update_block_group(trans, extent_key.objectid,
|
||||||
fs_info->nodesize, 1);
|
fs_info->nodesize, true);
|
||||||
if (ret) { /* -ENOENT, logic error */
|
if (ret) { /* -ENOENT, logic error */
|
||||||
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
btrfs_err(fs_info, "update block group failed for %llu %llu",
|
||||||
extent_key.objectid, extent_key.offset);
|
extent_key.objectid, extent_key.offset);
|
||||||
@@ -4655,7 +4739,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
||||||
ins->objectid, ins->offset, 0);
|
ins->objectid, ins->offset, 0);
|
||||||
btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset);
|
btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
|
||||||
|
offset, 0, false);
|
||||||
btrfs_ref_tree_mod(root->fs_info, &generic_ref);
|
btrfs_ref_tree_mod(root->fs_info, &generic_ref);
|
||||||
|
|
||||||
return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
|
return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
|
||||||
@@ -4847,8 +4932,8 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
|
||||||
ins.objectid, ins.offset, parent);
|
ins.objectid, ins.offset, parent);
|
||||||
generic_ref.real_root = root->root_key.objectid;
|
btrfs_init_tree_ref(&generic_ref, level, root_objectid,
|
||||||
btrfs_init_tree_ref(&generic_ref, level, root_objectid);
|
root->root_key.objectid, false);
|
||||||
btrfs_ref_tree_mod(fs_info, &generic_ref);
|
btrfs_ref_tree_mod(fs_info, &generic_ref);
|
||||||
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
|
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -5265,7 +5350,8 @@ skip:
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
||||||
fs_info->nodesize, parent);
|
fs_info->nodesize, parent);
|
||||||
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid);
|
btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
|
||||||
|
0, false);
|
||||||
ret = btrfs_free_extent(trans, &ref);
|
ret = btrfs_free_extent(trans, &ref);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
@@ -5750,13 +5836,13 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_assert_tree_locked(parent);
|
btrfs_assert_tree_write_locked(parent);
|
||||||
parent_level = btrfs_header_level(parent);
|
parent_level = btrfs_header_level(parent);
|
||||||
atomic_inc(&parent->refs);
|
atomic_inc(&parent->refs);
|
||||||
path->nodes[parent_level] = parent;
|
path->nodes[parent_level] = parent;
|
||||||
path->slots[parent_level] = btrfs_header_nritems(parent);
|
path->slots[parent_level] = btrfs_header_nritems(parent);
|
||||||
|
|
||||||
btrfs_assert_tree_locked(node);
|
btrfs_assert_tree_write_locked(node);
|
||||||
level = btrfs_header_level(node);
|
level = btrfs_header_level(node);
|
||||||
path->nodes[level] = node;
|
path->nodes[level] = node;
|
||||||
path->slots[level] = 0;
|
path->slots[level] = 0;
|
||||||
|
|||||||
@@ -241,7 +241,7 @@ int __init extent_io_init(void)
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
|
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
|
||||||
offsetof(struct btrfs_io_bio, bio),
|
offsetof(struct btrfs_bio, bio),
|
||||||
BIOSET_NEED_BVECS))
|
BIOSET_NEED_BVECS))
|
||||||
goto free_buffer_cache;
|
goto free_buffer_cache;
|
||||||
|
|
||||||
@@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Find and lock a contiguous range of bytes in the file marked as delalloc, no
|
* Find and lock a contiguous range of bytes in the file marked as delalloc, no
|
||||||
* more than @max_bytes. @Start and @end are used to return the range,
|
* more than @max_bytes.
|
||||||
*
|
*
|
||||||
* Return: true if we find something
|
* @start: The original start bytenr to search.
|
||||||
* false if nothing was in the tree
|
* Will store the extent range start bytenr.
|
||||||
|
* @end: The original end bytenr of the search range
|
||||||
|
* Will store the extent range end bytenr.
|
||||||
|
*
|
||||||
|
* Return true if we find a delalloc range which starts inside the original
|
||||||
|
* range, and @start/@end will store the delalloc range start/end.
|
||||||
|
*
|
||||||
|
* Return false if we can't find any delalloc range which starts inside the
|
||||||
|
* original range, and @start/@end will be the non-delalloc range start/end.
|
||||||
*/
|
*/
|
||||||
EXPORT_FOR_TESTS
|
EXPORT_FOR_TESTS
|
||||||
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||||
@@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
|||||||
u64 *end)
|
u64 *end)
|
||||||
{
|
{
|
||||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||||
|
const u64 orig_start = *start;
|
||||||
|
const u64 orig_end = *end;
|
||||||
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
|
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
|
||||||
u64 delalloc_start;
|
u64 delalloc_start;
|
||||||
u64 delalloc_end;
|
u64 delalloc_end;
|
||||||
@@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
|||||||
int ret;
|
int ret;
|
||||||
int loops = 0;
|
int loops = 0;
|
||||||
|
|
||||||
|
/* Caller should pass a valid @end to indicate the search range end */
|
||||||
|
ASSERT(orig_end > orig_start);
|
||||||
|
|
||||||
|
/* The range should at least cover part of the page */
|
||||||
|
ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
|
||||||
|
orig_end <= page_offset(locked_page)));
|
||||||
again:
|
again:
|
||||||
/* step one, find a bunch of delalloc bytes starting at start */
|
/* step one, find a bunch of delalloc bytes starting at start */
|
||||||
delalloc_start = *start;
|
delalloc_start = *start;
|
||||||
delalloc_end = 0;
|
delalloc_end = 0;
|
||||||
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
|
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
|
||||||
max_bytes, &cached_state);
|
max_bytes, &cached_state);
|
||||||
if (!found || delalloc_end <= *start) {
|
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
|
||||||
*start = delalloc_start;
|
*start = delalloc_start;
|
||||||
*end = delalloc_end;
|
|
||||||
|
/* @delalloc_end can be -1, never go beyond @orig_end */
|
||||||
|
*end = min(delalloc_end, orig_end);
|
||||||
free_extent_state(cached_state);
|
free_extent_state(cached_state);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -2282,7 +2300,7 @@ int free_io_failure(struct extent_io_tree *failure_tree,
|
|||||||
* currently, there can be no more than two copies of every data bit. thus,
|
* currently, there can be no more than two copies of every data bit. thus,
|
||||||
* exactly one rewrite is required.
|
* exactly one rewrite is required.
|
||||||
*/
|
*/
|
||||||
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||||
u64 length, u64 logical, struct page *page,
|
u64 length, u64 logical, struct page *page,
|
||||||
unsigned int pg_offset, int mirror_num)
|
unsigned int pg_offset, int mirror_num)
|
||||||
{
|
{
|
||||||
@@ -2290,7 +2308,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
|||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
u64 map_length = 0;
|
u64 map_length = 0;
|
||||||
u64 sector;
|
u64 sector;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
|
ASSERT(!(fs_info->sb->s_flags & SB_RDONLY));
|
||||||
@@ -2299,12 +2317,12 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
|||||||
if (btrfs_is_zoned(fs_info))
|
if (btrfs_is_zoned(fs_info))
|
||||||
return btrfs_repair_one_zone(fs_info, logical);
|
return btrfs_repair_one_zone(fs_info, logical);
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(1);
|
bio = btrfs_bio_alloc(1);
|
||||||
bio->bi_iter.bi_size = 0;
|
bio->bi_iter.bi_size = 0;
|
||||||
map_length = length;
|
map_length = length;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Avoid races with device replace and make sure our bbio has devices
|
* Avoid races with device replace and make sure our bioc has devices
|
||||||
* associated to its stripes that don't go away while we are doing the
|
* associated to its stripes that don't go away while we are doing the
|
||||||
* read repair operation.
|
* read repair operation.
|
||||||
*/
|
*/
|
||||||
@@ -2317,28 +2335,28 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
|||||||
* stripe's dev and sector.
|
* stripe's dev and sector.
|
||||||
*/
|
*/
|
||||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
|
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical,
|
||||||
&map_length, &bbio, 0);
|
&map_length, &bioc, 0);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
ASSERT(bbio->mirror_num == 1);
|
ASSERT(bioc->mirror_num == 1);
|
||||||
} else {
|
} else {
|
||||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
|
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical,
|
||||||
&map_length, &bbio, mirror_num);
|
&map_length, &bioc, mirror_num);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
BUG_ON(mirror_num != bbio->mirror_num);
|
BUG_ON(mirror_num != bioc->mirror_num);
|
||||||
}
|
}
|
||||||
|
|
||||||
sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9;
|
sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9;
|
||||||
bio->bi_iter.bi_sector = sector;
|
bio->bi_iter.bi_sector = sector;
|
||||||
dev = bbio->stripes[bbio->mirror_num - 1].dev;
|
dev = bioc->stripes[bioc->mirror_num - 1].dev;
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
if (!dev || !dev->bdev ||
|
if (!dev || !dev->bdev ||
|
||||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
|
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) {
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
@@ -2618,10 +2636,10 @@ int btrfs_repair_one_sector(struct inode *inode,
|
|||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||||
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
||||||
struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
|
struct btrfs_bio *failed_bbio = btrfs_bio(failed_bio);
|
||||||
const int icsum = bio_offset >> fs_info->sectorsize_bits;
|
const int icsum = bio_offset >> fs_info->sectorsize_bits;
|
||||||
struct bio *repair_bio;
|
struct bio *repair_bio;
|
||||||
struct btrfs_io_bio *repair_io_bio;
|
struct btrfs_bio *repair_bbio;
|
||||||
blk_status_t status;
|
blk_status_t status;
|
||||||
|
|
||||||
btrfs_debug(fs_info,
|
btrfs_debug(fs_info,
|
||||||
@@ -2639,24 +2657,23 @@ int btrfs_repair_one_sector(struct inode *inode,
|
|||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
repair_bio = btrfs_io_bio_alloc(1);
|
repair_bio = btrfs_bio_alloc(1);
|
||||||
repair_io_bio = btrfs_io_bio(repair_bio);
|
repair_bbio = btrfs_bio(repair_bio);
|
||||||
repair_bio->bi_opf = REQ_OP_READ;
|
repair_bio->bi_opf = REQ_OP_READ;
|
||||||
repair_bio->bi_end_io = failed_bio->bi_end_io;
|
repair_bio->bi_end_io = failed_bio->bi_end_io;
|
||||||
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
|
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
|
||||||
repair_bio->bi_private = failed_bio->bi_private;
|
repair_bio->bi_private = failed_bio->bi_private;
|
||||||
|
|
||||||
if (failed_io_bio->csum) {
|
if (failed_bbio->csum) {
|
||||||
const u32 csum_size = fs_info->csum_size;
|
const u32 csum_size = fs_info->csum_size;
|
||||||
|
|
||||||
repair_io_bio->csum = repair_io_bio->csum_inline;
|
repair_bbio->csum = repair_bbio->csum_inline;
|
||||||
memcpy(repair_io_bio->csum,
|
memcpy(repair_bbio->csum,
|
||||||
failed_io_bio->csum + csum_size * icsum, csum_size);
|
failed_bbio->csum + csum_size * icsum, csum_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
bio_add_page(repair_bio, page, failrec->len, pgoff);
|
bio_add_page(repair_bio, page, failrec->len, pgoff);
|
||||||
repair_io_bio->logical = failrec->start;
|
repair_bbio->iter = repair_bio->bi_iter;
|
||||||
repair_io_bio->iter = repair_bio->bi_iter;
|
|
||||||
|
|
||||||
btrfs_debug(btrfs_sb(inode->i_sb),
|
btrfs_debug(btrfs_sb(inode->i_sb),
|
||||||
"repair read error: submitting new read to mirror %d",
|
"repair read error: submitting new read to mirror %d",
|
||||||
@@ -2976,7 +2993,7 @@ static struct extent_buffer *find_extent_buffer_readpage(
|
|||||||
static void end_bio_extent_readpage(struct bio *bio)
|
static void end_bio_extent_readpage(struct bio *bio)
|
||||||
{
|
{
|
||||||
struct bio_vec *bvec;
|
struct bio_vec *bvec;
|
||||||
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
|
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||||
struct extent_io_tree *tree, *failure_tree;
|
struct extent_io_tree *tree, *failure_tree;
|
||||||
struct processed_extent processed = { 0 };
|
struct processed_extent processed = { 0 };
|
||||||
/*
|
/*
|
||||||
@@ -3003,7 +3020,7 @@ static void end_bio_extent_readpage(struct bio *bio)
|
|||||||
btrfs_debug(fs_info,
|
btrfs_debug(fs_info,
|
||||||
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
|
"end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u",
|
||||||
bio->bi_iter.bi_sector, bio->bi_status,
|
bio->bi_iter.bi_sector, bio->bi_status,
|
||||||
io_bio->mirror_num);
|
bbio->mirror_num);
|
||||||
tree = &BTRFS_I(inode)->io_tree;
|
tree = &BTRFS_I(inode)->io_tree;
|
||||||
failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
||||||
|
|
||||||
@@ -3028,14 +3045,14 @@ static void end_bio_extent_readpage(struct bio *bio)
|
|||||||
end = start + bvec->bv_len - 1;
|
end = start + bvec->bv_len - 1;
|
||||||
len = bvec->bv_len;
|
len = bvec->bv_len;
|
||||||
|
|
||||||
mirror = io_bio->mirror_num;
|
mirror = bbio->mirror_num;
|
||||||
if (likely(uptodate)) {
|
if (likely(uptodate)) {
|
||||||
if (is_data_inode(inode)) {
|
if (is_data_inode(inode)) {
|
||||||
error_bitmap = btrfs_verify_data_csum(io_bio,
|
error_bitmap = btrfs_verify_data_csum(bbio,
|
||||||
bio_offset, page, start, end);
|
bio_offset, page, start, end);
|
||||||
ret = error_bitmap;
|
ret = error_bitmap;
|
||||||
} else {
|
} else {
|
||||||
ret = btrfs_validate_metadata_buffer(io_bio,
|
ret = btrfs_validate_metadata_buffer(bbio,
|
||||||
page, start, end, mirror);
|
page, start, end, mirror);
|
||||||
}
|
}
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -3106,7 +3123,7 @@ readpage_ok:
|
|||||||
}
|
}
|
||||||
/* Release the last extent */
|
/* Release the last extent */
|
||||||
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
|
endio_readpage_release_extent(&processed, NULL, 0, 0, false);
|
||||||
btrfs_io_bio_free_csum(io_bio);
|
btrfs_bio_free_csum(bbio);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3115,53 +3132,43 @@ readpage_ok:
|
|||||||
* new bio by bio_alloc_bioset as it does not initialize the bytes outside of
|
* new bio by bio_alloc_bioset as it does not initialize the bytes outside of
|
||||||
* 'bio' because use of __GFP_ZERO is not supported.
|
* 'bio' because use of __GFP_ZERO is not supported.
|
||||||
*/
|
*/
|
||||||
static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
|
static inline void btrfs_bio_init(struct btrfs_bio *bbio)
|
||||||
{
|
{
|
||||||
memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio));
|
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following helpers allocate a bio. As it's backed by a bioset, it'll
|
* Allocate a btrfs_io_bio, with @nr_iovecs as maximum number of iovecs.
|
||||||
* never fail. We're returning a bio right now but you can call btrfs_io_bio
|
*
|
||||||
* for the appropriate container_of magic
|
* The bio allocation is backed by bioset and does not fail.
|
||||||
*/
|
*/
|
||||||
struct bio *btrfs_bio_alloc(u64 first_byte)
|
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs)
|
||||||
{
|
{
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
|
|
||||||
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset);
|
ASSERT(0 < nr_iovecs && nr_iovecs <= BIO_MAX_VECS);
|
||||||
bio->bi_iter.bi_sector = first_byte >> 9;
|
bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
|
||||||
btrfs_io_bio_init(btrfs_io_bio(bio));
|
btrfs_bio_init(btrfs_bio(bio));
|
||||||
return bio;
|
return bio;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bio *btrfs_bio_clone(struct bio *bio)
|
struct bio *btrfs_bio_clone(struct bio *bio)
|
||||||
{
|
{
|
||||||
struct btrfs_io_bio *btrfs_bio;
|
struct btrfs_bio *bbio;
|
||||||
struct bio *new;
|
struct bio *new;
|
||||||
|
|
||||||
/* Bio allocation backed by a bioset does not fail */
|
/* Bio allocation backed by a bioset does not fail */
|
||||||
new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
|
new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset);
|
||||||
btrfs_bio = btrfs_io_bio(new);
|
bbio = btrfs_bio(new);
|
||||||
btrfs_io_bio_init(btrfs_bio);
|
btrfs_bio_init(bbio);
|
||||||
btrfs_bio->iter = bio->bi_iter;
|
bbio->iter = bio->bi_iter;
|
||||||
return new;
|
return new;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs)
|
|
||||||
{
|
|
||||||
struct bio *bio;
|
|
||||||
|
|
||||||
/* Bio allocation backed by a bioset does not fail */
|
|
||||||
bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset);
|
|
||||||
btrfs_io_bio_init(btrfs_io_bio(bio));
|
|
||||||
return bio;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
|
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
|
||||||
{
|
{
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct btrfs_io_bio *btrfs_bio;
|
struct btrfs_bio *bbio;
|
||||||
|
|
||||||
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
|
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
|
||||||
|
|
||||||
@@ -3169,11 +3176,11 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size)
|
|||||||
bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
|
bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset);
|
||||||
ASSERT(bio);
|
ASSERT(bio);
|
||||||
|
|
||||||
btrfs_bio = btrfs_io_bio(bio);
|
bbio = btrfs_bio(bio);
|
||||||
btrfs_io_bio_init(btrfs_bio);
|
btrfs_bio_init(bbio);
|
||||||
|
|
||||||
bio_trim(bio, offset >> 9, size >> 9);
|
bio_trim(bio, offset >> 9, size >> 9);
|
||||||
btrfs_bio->iter = bio->bi_iter;
|
bbio->iter = bio->bi_iter;
|
||||||
return bio;
|
return bio;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3307,14 +3314,15 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
|||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||||
/*
|
/*
|
||||||
* For compressed page range, its disk_bytenr is always @disk_bytenr
|
* For compressed page range, its disk_bytenr is always @disk_bytenr
|
||||||
* passed in, no matter if we have added any range into previous bio.
|
* passed in, no matter if we have added any range into previous bio.
|
||||||
*/
|
*/
|
||||||
if (bio_flags & EXTENT_BIO_COMPRESSED)
|
if (bio_flags & EXTENT_BIO_COMPRESSED)
|
||||||
bio = btrfs_bio_alloc(disk_bytenr);
|
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||||
else
|
else
|
||||||
bio = btrfs_bio_alloc(disk_bytenr + offset);
|
bio->bi_iter.bi_sector = (disk_bytenr + offset) >> SECTOR_SHIFT;
|
||||||
bio_ctrl->bio = bio;
|
bio_ctrl->bio = bio;
|
||||||
bio_ctrl->bio_flags = bio_flags;
|
bio_ctrl->bio_flags = bio_flags;
|
||||||
bio->bi_end_io = end_io_func;
|
bio->bi_end_io = end_io_func;
|
||||||
@@ -3327,7 +3335,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
|||||||
if (wbc) {
|
if (wbc) {
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
|
|
||||||
bdev = fs_info->fs_devices->latest_bdev;
|
bdev = fs_info->fs_devices->latest_dev->bdev;
|
||||||
bio_set_dev(bio, bdev);
|
bio_set_dev(bio, bdev);
|
||||||
wbc_init_bio(wbc, bio);
|
wbc_init_bio(wbc, bio);
|
||||||
}
|
}
|
||||||
@@ -3341,7 +3349,7 @@ static int alloc_new_bio(struct btrfs_inode *inode,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_io_bio(bio)->device = device;
|
btrfs_bio(bio)->device = device;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
error:
|
error:
|
||||||
@@ -3599,6 +3607,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
|||||||
bool force_bio_submit = false;
|
bool force_bio_submit = false;
|
||||||
u64 disk_bytenr;
|
u64 disk_bytenr;
|
||||||
|
|
||||||
|
ASSERT(IS_ALIGNED(cur, fs_info->sectorsize));
|
||||||
if (cur >= last_byte) {
|
if (cur >= last_byte) {
|
||||||
struct extent_state *cached = NULL;
|
struct extent_state *cached = NULL;
|
||||||
|
|
||||||
@@ -3777,17 +3786,18 @@ static void update_nr_written(struct writeback_control *wbc,
|
|||||||
*/
|
*/
|
||||||
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||||
struct page *page, struct writeback_control *wbc,
|
struct page *page, struct writeback_control *wbc,
|
||||||
u64 delalloc_start, unsigned long *nr_written)
|
unsigned long *nr_written)
|
||||||
{
|
{
|
||||||
u64 page_end = delalloc_start + PAGE_SIZE - 1;
|
const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
|
||||||
bool found;
|
u64 delalloc_start = page_offset(page);
|
||||||
u64 delalloc_to_write = 0;
|
u64 delalloc_to_write = 0;
|
||||||
u64 delalloc_end = 0;
|
|
||||||
int ret;
|
int ret;
|
||||||
int page_started = 0;
|
int page_started = 0;
|
||||||
|
|
||||||
|
while (delalloc_start < page_end) {
|
||||||
|
u64 delalloc_end = page_end;
|
||||||
|
bool found;
|
||||||
|
|
||||||
while (delalloc_end < page_end) {
|
|
||||||
found = find_lock_delalloc_range(&inode->vfs_inode, page,
|
found = find_lock_delalloc_range(&inode->vfs_inode, page,
|
||||||
&delalloc_start,
|
&delalloc_start,
|
||||||
&delalloc_end);
|
&delalloc_end);
|
||||||
@@ -3854,12 +3864,11 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 *start, u64 *end)
|
struct page *page, u64 *start, u64 *end)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
|
struct btrfs_subpage_info *spi = fs_info->subpage_info;
|
||||||
u64 orig_start = *start;
|
u64 orig_start = *start;
|
||||||
/* Declare as unsigned long so we can use bitmap ops */
|
/* Declare as unsigned long so we can use bitmap ops */
|
||||||
unsigned long dirty_bitmap;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
|
int range_start_bit;
|
||||||
int range_start_bit = nbits;
|
|
||||||
int range_end_bit;
|
int range_end_bit;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -3872,13 +3881,18 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
range_start_bit = spi->dirty_offset +
|
||||||
|
(offset_in_page(orig_start) >> fs_info->sectorsize_bits);
|
||||||
|
|
||||||
/* We should have the page locked, but just in case */
|
/* We should have the page locked, but just in case */
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
dirty_bitmap = subpage->dirty_bitmap;
|
bitmap_next_set_region(subpage->bitmaps, &range_start_bit, &range_end_bit,
|
||||||
|
spi->dirty_offset + spi->bitmap_nr_bits);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
|
|
||||||
bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
|
range_start_bit -= spi->dirty_offset;
|
||||||
BTRFS_SUBPAGE_BITMAP_SIZE);
|
range_end_bit -= spi->dirty_offset;
|
||||||
|
|
||||||
*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
|
*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
|
||||||
*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
|
*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
|
||||||
}
|
}
|
||||||
@@ -4054,8 +4068,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
|||||||
struct extent_page_data *epd)
|
struct extent_page_data *epd)
|
||||||
{
|
{
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = page->mapping->host;
|
||||||
u64 start = page_offset(page);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
u64 page_end = start + PAGE_SIZE - 1;
|
const u64 page_start = page_offset(page);
|
||||||
|
const u64 page_end = page_start + PAGE_SIZE - 1;
|
||||||
int ret;
|
int ret;
|
||||||
int nr = 0;
|
int nr = 0;
|
||||||
size_t pg_offset;
|
size_t pg_offset;
|
||||||
@@ -4090,8 +4105,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!epd->extent_locked) {
|
if (!epd->extent_locked) {
|
||||||
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
|
ret = writepage_delalloc(BTRFS_I(inode), page, wbc, &nr_written);
|
||||||
&nr_written);
|
|
||||||
if (ret == 1)
|
if (ret == 1)
|
||||||
return 0;
|
return 0;
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -4141,8 +4155,20 @@ done:
|
|||||||
* capable of that.
|
* capable of that.
|
||||||
*/
|
*/
|
||||||
if (PageError(page))
|
if (PageError(page))
|
||||||
end_extent_writepage(page, ret, start, page_end);
|
end_extent_writepage(page, ret, page_start, page_end);
|
||||||
|
if (epd->extent_locked) {
|
||||||
|
/*
|
||||||
|
* If epd->extent_locked, it's from extent_write_locked_range(),
|
||||||
|
* the page can either be locked by lock_page() or
|
||||||
|
* process_one_page().
|
||||||
|
* Let btrfs_page_unlock_writer() handle both cases.
|
||||||
|
*/
|
||||||
|
ASSERT(wbc);
|
||||||
|
btrfs_page_unlock_writer(fs_info, page, wbc->range_start,
|
||||||
|
wbc->range_end + 1 - wbc->range_start);
|
||||||
|
} else {
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
|
}
|
||||||
ASSERT(ret <= 0);
|
ASSERT(ret <= 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -4155,6 +4181,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
|||||||
|
|
||||||
static void end_extent_buffer_writeback(struct extent_buffer *eb)
|
static void end_extent_buffer_writeback(struct extent_buffer *eb)
|
||||||
{
|
{
|
||||||
|
if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
|
||||||
|
btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
|
||||||
|
|
||||||
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
|
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
|
||||||
smp_mb__after_atomic();
|
smp_mb__after_atomic();
|
||||||
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
|
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
|
||||||
@@ -4602,12 +4631,11 @@ static int submit_eb_subpage(struct page *page,
|
|||||||
int submitted = 0;
|
int submitted = 0;
|
||||||
u64 page_start = page_offset(page);
|
u64 page_start = page_offset(page);
|
||||||
int bit_start = 0;
|
int bit_start = 0;
|
||||||
const int nbits = BTRFS_SUBPAGE_BITMAP_SIZE;
|
|
||||||
int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
|
int sectors_per_node = fs_info->nodesize >> fs_info->sectorsize_bits;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* Lock and write each dirty extent buffers in the range */
|
/* Lock and write each dirty extent buffers in the range */
|
||||||
while (bit_start < nbits) {
|
while (bit_start < fs_info->subpage_info->bitmap_nr_bits) {
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
struct extent_buffer *eb;
|
struct extent_buffer *eb;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
@@ -4623,7 +4651,8 @@ static int submit_eb_subpage(struct page *page,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
if (!((1 << bit_start) & subpage->dirty_bitmap)) {
|
if (!test_bit(bit_start + fs_info->subpage_info->dirty_offset,
|
||||||
|
subpage->bitmaps)) {
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
spin_unlock(&page->mapping->private_lock);
|
spin_unlock(&page->mapping->private_lock);
|
||||||
bit_start++;
|
bit_start++;
|
||||||
@@ -4756,8 +4785,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
|
|||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
if (cache)
|
if (cache) {
|
||||||
|
/* Impiles write in zoned mode */
|
||||||
btrfs_put_block_group(cache);
|
btrfs_put_block_group(cache);
|
||||||
|
/* Mark the last eb in a block group */
|
||||||
|
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
|
||||||
|
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
|
||||||
|
}
|
||||||
ret = write_one_eb(eb, wbc, epd);
|
ret = write_one_eb(eb, wbc, epd);
|
||||||
free_extent_buffer(eb);
|
free_extent_buffer(eb);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
@@ -4873,7 +4907,7 @@ retry:
|
|||||||
* extent io tree. Thus we don't want to submit such wild eb
|
* extent io tree. Thus we don't want to submit such wild eb
|
||||||
* if the fs already has error.
|
* if the fs already has error.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
if (!BTRFS_FS_ERROR(fs_info)) {
|
||||||
ret = flush_write_bio(&epd);
|
ret = flush_write_bio(&epd);
|
||||||
} else {
|
} else {
|
||||||
ret = -EROFS;
|
ret = -EROFS;
|
||||||
@@ -5069,23 +5103,28 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
/*
|
||||||
int mode)
|
* Submit the pages in the range to bio for call sites which delalloc range has
|
||||||
|
* already been ran (aka, ordered extent inserted) and all pages are still
|
||||||
|
* locked.
|
||||||
|
*/
|
||||||
|
int extent_write_locked_range(struct inode *inode, u64 start, u64 end)
|
||||||
{
|
{
|
||||||
|
bool found_error = false;
|
||||||
|
int first_error = 0;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
unsigned long nr_pages = (end - start + PAGE_SIZE) >>
|
u64 cur = start;
|
||||||
PAGE_SHIFT;
|
unsigned long nr_pages;
|
||||||
|
const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize;
|
||||||
struct extent_page_data epd = {
|
struct extent_page_data epd = {
|
||||||
.bio_ctrl = { 0 },
|
.bio_ctrl = { 0 },
|
||||||
.extent_locked = 1,
|
.extent_locked = 1,
|
||||||
.sync_io = mode == WB_SYNC_ALL,
|
.sync_io = 1,
|
||||||
};
|
};
|
||||||
struct writeback_control wbc_writepages = {
|
struct writeback_control wbc_writepages = {
|
||||||
.sync_mode = mode,
|
.sync_mode = WB_SYNC_ALL,
|
||||||
.nr_to_write = nr_pages * 2,
|
|
||||||
.range_start = start,
|
.range_start = start,
|
||||||
.range_end = end + 1,
|
.range_end = end + 1,
|
||||||
/* We're called from an async helper function */
|
/* We're called from an async helper function */
|
||||||
@@ -5093,33 +5132,51 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
|||||||
.no_cgroup_owner = 1,
|
.no_cgroup_owner = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ASSERT(IS_ALIGNED(start, sectorsize) && IS_ALIGNED(end + 1, sectorsize));
|
||||||
|
nr_pages = (round_up(end, PAGE_SIZE) - round_down(start, PAGE_SIZE)) >>
|
||||||
|
PAGE_SHIFT;
|
||||||
|
wbc_writepages.nr_to_write = nr_pages * 2;
|
||||||
|
|
||||||
wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
|
wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
|
||||||
while (start <= end) {
|
while (cur <= end) {
|
||||||
page = find_get_page(mapping, start >> PAGE_SHIFT);
|
u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
|
||||||
if (clear_page_dirty_for_io(page))
|
|
||||||
|
page = find_get_page(mapping, cur >> PAGE_SHIFT);
|
||||||
|
/*
|
||||||
|
* All pages in the range are locked since
|
||||||
|
* btrfs_run_delalloc_range(), thus there is no way to clear
|
||||||
|
* the page dirty flag.
|
||||||
|
*/
|
||||||
|
ASSERT(PageLocked(page));
|
||||||
|
ASSERT(PageDirty(page));
|
||||||
|
clear_page_dirty_for_io(page);
|
||||||
ret = __extent_writepage(page, &wbc_writepages, &epd);
|
ret = __extent_writepage(page, &wbc_writepages, &epd);
|
||||||
else {
|
ASSERT(ret <= 0);
|
||||||
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
|
if (ret < 0) {
|
||||||
page, start, start + PAGE_SIZE - 1, true);
|
found_error = true;
|
||||||
unlock_page(page);
|
first_error = ret;
|
||||||
}
|
}
|
||||||
put_page(page);
|
put_page(page);
|
||||||
start += PAGE_SIZE;
|
cur = cur_end + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT(ret <= 0);
|
if (!found_error)
|
||||||
if (ret == 0)
|
|
||||||
ret = flush_write_bio(&epd);
|
ret = flush_write_bio(&epd);
|
||||||
else
|
else
|
||||||
end_write_bio(&epd, ret);
|
end_write_bio(&epd, ret);
|
||||||
|
|
||||||
wbc_detach_inode(&wbc_writepages);
|
wbc_detach_inode(&wbc_writepages);
|
||||||
|
if (found_error)
|
||||||
|
return first_error;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int extent_writepages(struct address_space *mapping,
|
int extent_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
|
const bool data_reloc = btrfs_is_data_reloc_root(BTRFS_I(inode)->root);
|
||||||
|
const bool zoned = btrfs_is_zoned(BTRFS_I(inode)->root->fs_info);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
struct extent_page_data epd = {
|
struct extent_page_data epd = {
|
||||||
.bio_ctrl = { 0 },
|
.bio_ctrl = { 0 },
|
||||||
@@ -5127,7 +5184,15 @@ int extent_writepages(struct address_space *mapping,
|
|||||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow only a single thread to do the reloc work in zoned mode to
|
||||||
|
* protect the write pointer updates.
|
||||||
|
*/
|
||||||
|
if (data_reloc && zoned)
|
||||||
|
btrfs_inode_lock(inode, 0);
|
||||||
ret = extent_write_cache_pages(mapping, wbc, &epd);
|
ret = extent_write_cache_pages(mapping, wbc, &epd);
|
||||||
|
if (data_reloc && zoned)
|
||||||
|
btrfs_inode_unlock(inode, 0);
|
||||||
ASSERT(ret <= 0);
|
ASSERT(ret <= 0);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
end_write_bio(&epd, ret);
|
end_write_bio(&epd, ret);
|
||||||
@@ -6137,14 +6202,16 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
|||||||
* page, but it may change in the future for 16K page size
|
* page, but it may change in the future for 16K page size
|
||||||
* support, so we still preallocate the memory in the loop.
|
* support, so we still preallocate the memory in the loop.
|
||||||
*/
|
*/
|
||||||
ret = btrfs_alloc_subpage(fs_info, &prealloc,
|
if (fs_info->sectorsize < PAGE_SIZE) {
|
||||||
BTRFS_SUBPAGE_METADATA);
|
prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
|
||||||
if (ret < 0) {
|
if (IS_ERR(prealloc)) {
|
||||||
|
ret = PTR_ERR(prealloc);
|
||||||
unlock_page(p);
|
unlock_page(p);
|
||||||
put_page(p);
|
put_page(p);
|
||||||
exists = ERR_PTR(ret);
|
exists = ERR_PTR(ret);
|
||||||
goto free_eb;
|
goto free_eb;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&mapping->private_lock);
|
spin_lock(&mapping->private_lock);
|
||||||
exists = grab_extent_buffer(fs_info, p);
|
exists = grab_extent_buffer(fs_info, p);
|
||||||
@@ -7167,32 +7234,41 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define GANG_LOOKUP_SIZE 16
|
||||||
static struct extent_buffer *get_next_extent_buffer(
|
static struct extent_buffer *get_next_extent_buffer(
|
||||||
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
|
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
|
||||||
{
|
{
|
||||||
struct extent_buffer *gang[BTRFS_SUBPAGE_BITMAP_SIZE];
|
struct extent_buffer *gang[GANG_LOOKUP_SIZE];
|
||||||
struct extent_buffer *found = NULL;
|
struct extent_buffer *found = NULL;
|
||||||
u64 page_start = page_offset(page);
|
u64 page_start = page_offset(page);
|
||||||
|
u64 cur = page_start;
|
||||||
|
|
||||||
|
ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
|
||||||
|
lockdep_assert_held(&fs_info->buffer_lock);
|
||||||
|
|
||||||
|
while (cur < page_start + PAGE_SIZE) {
|
||||||
int ret;
|
int ret;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
|
ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
|
||||||
ASSERT(PAGE_SIZE / fs_info->nodesize <= BTRFS_SUBPAGE_BITMAP_SIZE);
|
(void **)gang, cur >> fs_info->sectorsize_bits,
|
||||||
lockdep_assert_held(&fs_info->buffer_lock);
|
min_t(unsigned int, GANG_LOOKUP_SIZE,
|
||||||
|
PAGE_SIZE / fs_info->nodesize));
|
||||||
ret = radix_tree_gang_lookup(&fs_info->buffer_radix, (void **)gang,
|
if (ret == 0)
|
||||||
bytenr >> fs_info->sectorsize_bits,
|
goto out;
|
||||||
PAGE_SIZE / fs_info->nodesize);
|
|
||||||
for (i = 0; i < ret; i++) {
|
for (i = 0; i < ret; i++) {
|
||||||
/* Already beyond page end */
|
/* Already beyond page end */
|
||||||
if (gang[i]->start >= page_start + PAGE_SIZE)
|
if (gang[i]->start >= page_start + PAGE_SIZE)
|
||||||
break;
|
goto out;
|
||||||
/* Found one */
|
/* Found one */
|
||||||
if (gang[i]->start >= bytenr) {
|
if (gang[i]->start >= bytenr) {
|
||||||
found = gang[i];
|
found = gang[i];
|
||||||
break;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
cur = gang[ret - 1]->start + gang[ret - 1]->len;
|
||||||
|
}
|
||||||
|
out:
|
||||||
return found;
|
return found;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ enum {
|
|||||||
/* write IO error */
|
/* write IO error */
|
||||||
EXTENT_BUFFER_WRITE_ERR,
|
EXTENT_BUFFER_WRITE_ERR,
|
||||||
EXTENT_BUFFER_NO_CHECK,
|
EXTENT_BUFFER_NO_CHECK,
|
||||||
|
EXTENT_BUFFER_ZONE_FINISH,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* these are flags for __process_pages_contig */
|
/* these are flags for __process_pages_contig */
|
||||||
@@ -183,8 +184,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
|
|||||||
struct btrfs_bio_ctrl *bio_ctrl,
|
struct btrfs_bio_ctrl *bio_ctrl,
|
||||||
unsigned int read_flags, u64 *prev_em_start);
|
unsigned int read_flags, u64 *prev_em_start);
|
||||||
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
|
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
|
||||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
|
||||||
int mode);
|
|
||||||
int extent_writepages(struct address_space *mapping,
|
int extent_writepages(struct address_space *mapping,
|
||||||
struct writeback_control *wbc);
|
struct writeback_control *wbc);
|
||||||
int btree_write_cache_pages(struct address_space *mapping,
|
int btree_write_cache_pages(struct address_space *mapping,
|
||||||
@@ -277,14 +277,10 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
|||||||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||||
struct page *locked_page,
|
struct page *locked_page,
|
||||||
u32 bits_to_clear, unsigned long page_ops);
|
u32 bits_to_clear, unsigned long page_ops);
|
||||||
struct bio *btrfs_bio_alloc(u64 first_byte);
|
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
|
||||||
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
|
|
||||||
struct bio *btrfs_bio_clone(struct bio *bio);
|
struct bio *btrfs_bio_clone(struct bio *bio);
|
||||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
|
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
|
||||||
|
|
||||||
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
|
||||||
u64 length, u64 logical, struct page *page,
|
|
||||||
unsigned int pg_offset, int mirror_num);
|
|
||||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||||
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
|
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
|
||||||
|
|
||||||
|
|||||||
@@ -360,7 +360,7 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < map->num_stripes; i++) {
|
for (i = 0; i < map->num_stripes; i++) {
|
||||||
struct btrfs_bio_stripe *stripe = &map->stripes[i];
|
struct btrfs_io_stripe *stripe = &map->stripes[i];
|
||||||
struct btrfs_device *device = stripe->dev;
|
struct btrfs_device *device = stripe->dev;
|
||||||
|
|
||||||
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
|
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
|
||||||
@@ -375,7 +375,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < map->num_stripes; i++) {
|
for (i = 0; i < map->num_stripes; i++) {
|
||||||
struct btrfs_bio_stripe *stripe = &map->stripes[i];
|
struct btrfs_io_stripe *stripe = &map->stripes[i];
|
||||||
struct btrfs_device *device = stripe->dev;
|
struct btrfs_device *device = stripe->dev;
|
||||||
|
|
||||||
__clear_extent_bit(&device->alloc_state, stripe->physical,
|
__clear_extent_bit(&device->alloc_state, stripe->physical,
|
||||||
|
|||||||
@@ -358,7 +358,7 @@ static int search_file_offset_in_bio(struct bio *bio, struct inode *inode,
|
|||||||
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
|
* @dst: Buffer of size nblocks * btrfs_super_csum_size() used to return
|
||||||
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
|
* checksum (nblocks = bio->bi_iter.bi_size / fs_info->sectorsize). If
|
||||||
* NULL, the checksum buffer is allocated and returned in
|
* NULL, the checksum buffer is allocated and returned in
|
||||||
* btrfs_io_bio(bio)->csum instead.
|
* btrfs_bio(bio)->csum instead.
|
||||||
*
|
*
|
||||||
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
||||||
*/
|
*/
|
||||||
@@ -397,19 +397,18 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
|||||||
return BLK_STS_RESOURCE;
|
return BLK_STS_RESOURCE;
|
||||||
|
|
||||||
if (!dst) {
|
if (!dst) {
|
||||||
struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
|
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||||
|
|
||||||
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
|
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
|
||||||
btrfs_bio->csum = kmalloc_array(nblocks, csum_size,
|
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
|
||||||
GFP_NOFS);
|
if (!bbio->csum) {
|
||||||
if (!btrfs_bio->csum) {
|
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
return BLK_STS_RESOURCE;
|
return BLK_STS_RESOURCE;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
btrfs_bio->csum = btrfs_bio->csum_inline;
|
bbio->csum = bbio->csum_inline;
|
||||||
}
|
}
|
||||||
csum = btrfs_bio->csum;
|
csum = bbio->csum;
|
||||||
} else {
|
} else {
|
||||||
csum = dst;
|
csum = dst;
|
||||||
}
|
}
|
||||||
@@ -709,12 +708,12 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
|||||||
index = 0;
|
index = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
data = kmap_atomic(bvec.bv_page);
|
data = bvec_kmap_local(&bvec);
|
||||||
crypto_shash_digest(shash, data + bvec.bv_offset
|
crypto_shash_digest(shash,
|
||||||
+ (i * fs_info->sectorsize),
|
data + (i * fs_info->sectorsize),
|
||||||
fs_info->sectorsize,
|
fs_info->sectorsize,
|
||||||
sums->sums + index);
|
sums->sums + index);
|
||||||
kunmap_atomic(data);
|
kunmap_local(data);
|
||||||
index += fs_info->csum_size;
|
index += fs_info->csum_size;
|
||||||
offset += fs_info->sectorsize;
|
offset += fs_info->sectorsize;
|
||||||
this_sum_bytes += fs_info->sectorsize;
|
this_sum_bytes += fs_info->sectorsize;
|
||||||
|
|||||||
@@ -437,9 +437,15 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
|||||||
/*
|
/*
|
||||||
* unlocks pages after btrfs_file_write is done with them
|
* unlocks pages after btrfs_file_write is done with them
|
||||||
*/
|
*/
|
||||||
static void btrfs_drop_pages(struct page **pages, size_t num_pages)
|
static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
|
||||||
|
struct page **pages, size_t num_pages,
|
||||||
|
u64 pos, u64 copied)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i;
|
||||||
|
u64 block_start = round_down(pos, fs_info->sectorsize);
|
||||||
|
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
|
||||||
|
|
||||||
|
ASSERT(block_len <= U32_MAX);
|
||||||
for (i = 0; i < num_pages; i++) {
|
for (i = 0; i < num_pages; i++) {
|
||||||
/* page checked is some magic around finding pages that
|
/* page checked is some magic around finding pages that
|
||||||
* have been modified without going through btrfs_set_page_dirty
|
* have been modified without going through btrfs_set_page_dirty
|
||||||
@@ -447,7 +453,8 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
|
|||||||
* accessed as prepare_pages should have marked them accessed
|
* accessed as prepare_pages should have marked them accessed
|
||||||
* in prepare_pages via find_or_create_page()
|
* in prepare_pages via find_or_create_page()
|
||||||
*/
|
*/
|
||||||
ClearPageChecked(pages[i]);
|
btrfs_page_clamp_clear_checked(fs_info, pages[i], block_start,
|
||||||
|
block_len);
|
||||||
unlock_page(pages[i]);
|
unlock_page(pages[i]);
|
||||||
put_page(pages[i]);
|
put_page(pages[i]);
|
||||||
}
|
}
|
||||||
@@ -504,7 +511,7 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
|||||||
struct page *p = pages[i];
|
struct page *p = pages[i];
|
||||||
|
|
||||||
btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
|
btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
|
||||||
ClearPageChecked(p);
|
btrfs_page_clamp_clear_checked(fs_info, p, start_pos, num_bytes);
|
||||||
btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
|
btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -869,7 +876,8 @@ next_slot:
|
|||||||
btrfs_init_data_ref(&ref,
|
btrfs_init_data_ref(&ref,
|
||||||
root->root_key.objectid,
|
root->root_key.objectid,
|
||||||
new_key.objectid,
|
new_key.objectid,
|
||||||
args->start - extent_offset);
|
args->start - extent_offset,
|
||||||
|
0, false);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
}
|
}
|
||||||
@@ -955,7 +963,8 @@ delete_extent_item:
|
|||||||
btrfs_init_data_ref(&ref,
|
btrfs_init_data_ref(&ref,
|
||||||
root->root_key.objectid,
|
root->root_key.objectid,
|
||||||
key.objectid,
|
key.objectid,
|
||||||
key.offset - extent_offset);
|
key.offset - extent_offset, 0,
|
||||||
|
false);
|
||||||
ret = btrfs_free_extent(trans, &ref);
|
ret = btrfs_free_extent(trans, &ref);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
args->bytes_found += extent_end - key.offset;
|
args->bytes_found += extent_end - key.offset;
|
||||||
@@ -1020,8 +1029,7 @@ delete_extent_item:
|
|||||||
if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
|
if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
|
||||||
path->slots[0]++;
|
path->slots[0]++;
|
||||||
}
|
}
|
||||||
setup_items_for_insert(root, path, &key,
|
btrfs_setup_item_for_insert(root, path, &key, args->extent_item_size);
|
||||||
&args->extent_item_size, 1);
|
|
||||||
args->extent_inserted = true;
|
args->extent_inserted = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1232,7 +1240,7 @@ again:
|
|||||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
|
||||||
num_bytes, 0);
|
num_bytes, 0);
|
||||||
btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
|
btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
|
||||||
orig_offset);
|
orig_offset, 0, false);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -1257,7 +1265,8 @@ again:
|
|||||||
other_end = 0;
|
other_end = 0;
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
||||||
num_bytes, 0);
|
num_bytes, 0);
|
||||||
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset);
|
btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset,
|
||||||
|
0, false);
|
||||||
if (extent_mergeable(leaf, path->slots[0] + 1,
|
if (extent_mergeable(leaf, path->slots[0] + 1,
|
||||||
ino, bytenr, orig_offset,
|
ino, bytenr, orig_offset,
|
||||||
&other_start, &other_end)) {
|
&other_start, &other_end)) {
|
||||||
@@ -1844,7 +1853,7 @@ again:
|
|||||||
|
|
||||||
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_drop_pages(pages, num_pages);
|
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1852,7 +1861,7 @@ again:
|
|||||||
if (only_release_metadata)
|
if (only_release_metadata)
|
||||||
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||||
|
|
||||||
btrfs_drop_pages(pages, num_pages);
|
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
@@ -2012,7 +2021,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
|
|||||||
* have opened a file as writable, we have to stop this write operation
|
* have opened a file as writable, we have to stop this write operation
|
||||||
* to ensure consistency.
|
* to ensure consistency.
|
||||||
*/
|
*/
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &inode->root->fs_info->fs_state))
|
if (BTRFS_FS_ERROR(inode->root->fs_info))
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
if (!(iocb->ki_flags & IOCB_DIRECT) &&
|
if (!(iocb->ki_flags & IOCB_DIRECT) &&
|
||||||
@@ -2620,7 +2629,7 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
|
|||||||
extent_info->disk_len, 0);
|
extent_info->disk_len, 0);
|
||||||
ref_offset = extent_info->file_offset - extent_info->data_offset;
|
ref_offset = extent_info->file_offset - extent_info->data_offset;
|
||||||
btrfs_init_data_ref(&ref, root->root_key.objectid,
|
btrfs_init_data_ref(&ref, root->root_key.objectid,
|
||||||
btrfs_ino(inode), ref_offset);
|
btrfs_ino(inode), ref_offset, 0, false);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
#include "delalloc-space.h"
|
#include "delalloc-space.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
#include "discard.h"
|
#include "discard.h"
|
||||||
|
#include "subpage.h"
|
||||||
|
|
||||||
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
|
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
|
||||||
#define MAX_CACHE_BYTES_PER_GIG SZ_64K
|
#define MAX_CACHE_BYTES_PER_GIG SZ_64K
|
||||||
@@ -411,7 +412,10 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
|
|||||||
|
|
||||||
for (i = 0; i < io_ctl->num_pages; i++) {
|
for (i = 0; i < io_ctl->num_pages; i++) {
|
||||||
if (io_ctl->pages[i]) {
|
if (io_ctl->pages[i]) {
|
||||||
ClearPageChecked(io_ctl->pages[i]);
|
btrfs_page_clear_checked(io_ctl->fs_info,
|
||||||
|
io_ctl->pages[i],
|
||||||
|
page_offset(io_ctl->pages[i]),
|
||||||
|
PAGE_SIZE);
|
||||||
unlock_page(io_ctl->pages[i]);
|
unlock_page(io_ctl->pages[i]);
|
||||||
put_page(io_ctl->pages[i]);
|
put_page(io_ctl->pages[i]);
|
||||||
}
|
}
|
||||||
@@ -2539,10 +2543,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
|||||||
u64 offset = bytenr - block_group->start;
|
u64 offset = bytenr - block_group->start;
|
||||||
u64 to_free, to_unusable;
|
u64 to_free, to_unusable;
|
||||||
const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
|
const int bg_reclaim_threshold = READ_ONCE(fs_info->bg_reclaim_threshold);
|
||||||
|
bool initial = (size == block_group->length);
|
||||||
|
u64 reclaimable_unusable;
|
||||||
|
|
||||||
|
WARN_ON(!initial && offset + size > block_group->zone_capacity);
|
||||||
|
|
||||||
spin_lock(&ctl->tree_lock);
|
spin_lock(&ctl->tree_lock);
|
||||||
if (!used)
|
if (!used)
|
||||||
to_free = size;
|
to_free = size;
|
||||||
|
else if (initial)
|
||||||
|
to_free = block_group->zone_capacity;
|
||||||
else if (offset >= block_group->alloc_offset)
|
else if (offset >= block_group->alloc_offset)
|
||||||
to_free = size;
|
to_free = size;
|
||||||
else if (offset + size <= block_group->alloc_offset)
|
else if (offset + size <= block_group->alloc_offset)
|
||||||
@@ -2565,12 +2575,15 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
|||||||
spin_unlock(&block_group->lock);
|
spin_unlock(&block_group->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reclaimable_unusable = block_group->zone_unusable -
|
||||||
|
(block_group->length - block_group->zone_capacity);
|
||||||
/* All the region is now unusable. Mark it as unused and reclaim */
|
/* All the region is now unusable. Mark it as unused and reclaim */
|
||||||
if (block_group->zone_unusable == block_group->length) {
|
if (block_group->zone_unusable == block_group->length) {
|
||||||
btrfs_mark_bg_unused(block_group);
|
btrfs_mark_bg_unused(block_group);
|
||||||
} else if (bg_reclaim_threshold &&
|
} else if (bg_reclaim_threshold &&
|
||||||
block_group->zone_unusable >=
|
reclaimable_unusable >=
|
||||||
div_factor_fine(block_group->length, bg_reclaim_threshold)) {
|
div_factor_fine(block_group->zone_capacity,
|
||||||
|
bg_reclaim_threshold)) {
|
||||||
btrfs_mark_bg_to_reclaim(block_group);
|
btrfs_mark_bg_to_reclaim(block_group);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2754,8 +2767,9 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
|||||||
* out the free space after the allocation offset.
|
* out the free space after the allocation offset.
|
||||||
*/
|
*/
|
||||||
if (btrfs_is_zoned(fs_info)) {
|
if (btrfs_is_zoned(fs_info)) {
|
||||||
btrfs_info(fs_info, "free space %llu",
|
btrfs_info(fs_info, "free space %llu active %d",
|
||||||
block_group->length - block_group->alloc_offset);
|
block_group->zone_capacity - block_group->alloc_offset,
|
||||||
|
block_group->zone_is_active);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
539
fs/btrfs/inode.c
539
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
1012
fs/btrfs/ioctl.c
1012
fs/btrfs/ioctl.c
File diff suppressed because it is too large
Load Diff
@@ -96,11 +96,12 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
|||||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
|
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb)
|
||||||
lockdep_assert_held(&eb->lock);
|
{
|
||||||
|
lockdep_assert_held_write(&eb->lock);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
|
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
|
void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
|
||||||
|
|||||||
276
fs/btrfs/lzo.c
276
fs/btrfs/lzo.c
@@ -32,19 +32,19 @@
|
|||||||
* payload.
|
* payload.
|
||||||
* One regular LZO compressed extent can have one or more segments.
|
* One regular LZO compressed extent can have one or more segments.
|
||||||
* For inlined LZO compressed extent, only one segment is allowed.
|
* For inlined LZO compressed extent, only one segment is allowed.
|
||||||
* One segment represents at most one page of uncompressed data.
|
* One segment represents at most one sector of uncompressed data.
|
||||||
*
|
*
|
||||||
* 2.1 Segment header
|
* 2.1 Segment header
|
||||||
* Fixed size. LZO_LEN (4) bytes long, LE32.
|
* Fixed size. LZO_LEN (4) bytes long, LE32.
|
||||||
* Records the total size of the segment (not including the header).
|
* Records the total size of the segment (not including the header).
|
||||||
* Segment header never crosses page boundary, thus it's possible to
|
* Segment header never crosses sector boundary, thus it's possible to
|
||||||
* have at most 3 padding zeros at the end of the page.
|
* have at most 3 padding zeros at the end of the sector.
|
||||||
*
|
*
|
||||||
* 2.2 Data Payload
|
* 2.2 Data Payload
|
||||||
* Variable size. Size up limit should be lzo1x_worst_compress(PAGE_SIZE)
|
* Variable size. Size up limit should be lzo1x_worst_compress(sectorsize)
|
||||||
* which is 4419 for a 4KiB page.
|
* which is 4419 for a 4KiB sectorsize.
|
||||||
*
|
*
|
||||||
* Example:
|
* Example with 4K sectorsize:
|
||||||
* Page 1:
|
* Page 1:
|
||||||
* 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
|
* 0 0x2 0x4 0x6 0x8 0xa 0xc 0xe 0x10
|
||||||
* 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
|
* 0x0000 | Header | SegHdr 01 | Data payload 01 ... |
|
||||||
@@ -112,170 +112,174 @@ static inline size_t read_compress_length(const char *buf)
|
|||||||
return le32_to_cpu(dlen);
|
return le32_to_cpu(dlen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Will do:
|
||||||
|
*
|
||||||
|
* - Write a segment header into the destination
|
||||||
|
* - Copy the compressed buffer into the destination
|
||||||
|
* - Make sure we have enough space in the last sector to fit a segment header
|
||||||
|
* If not, we will pad at most (LZO_LEN (4)) - 1 bytes of zeros.
|
||||||
|
*
|
||||||
|
* Will allocate new pages when needed.
|
||||||
|
*/
|
||||||
|
static int copy_compressed_data_to_page(char *compressed_data,
|
||||||
|
size_t compressed_size,
|
||||||
|
struct page **out_pages,
|
||||||
|
u32 *cur_out,
|
||||||
|
const u32 sectorsize)
|
||||||
|
{
|
||||||
|
u32 sector_bytes_left;
|
||||||
|
u32 orig_out;
|
||||||
|
struct page *cur_page;
|
||||||
|
char *kaddr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We never allow a segment header crossing sector boundary, previous
|
||||||
|
* run should ensure we have enough space left inside the sector.
|
||||||
|
*/
|
||||||
|
ASSERT((*cur_out / sectorsize) == (*cur_out + LZO_LEN - 1) / sectorsize);
|
||||||
|
|
||||||
|
cur_page = out_pages[*cur_out / PAGE_SIZE];
|
||||||
|
/* Allocate a new page */
|
||||||
|
if (!cur_page) {
|
||||||
|
cur_page = alloc_page(GFP_NOFS);
|
||||||
|
if (!cur_page)
|
||||||
|
return -ENOMEM;
|
||||||
|
out_pages[*cur_out / PAGE_SIZE] = cur_page;
|
||||||
|
}
|
||||||
|
|
||||||
|
kaddr = kmap(cur_page);
|
||||||
|
write_compress_length(kaddr + offset_in_page(*cur_out),
|
||||||
|
compressed_size);
|
||||||
|
*cur_out += LZO_LEN;
|
||||||
|
|
||||||
|
orig_out = *cur_out;
|
||||||
|
|
||||||
|
/* Copy compressed data */
|
||||||
|
while (*cur_out - orig_out < compressed_size) {
|
||||||
|
u32 copy_len = min_t(u32, sectorsize - *cur_out % sectorsize,
|
||||||
|
orig_out + compressed_size - *cur_out);
|
||||||
|
|
||||||
|
kunmap(cur_page);
|
||||||
|
cur_page = out_pages[*cur_out / PAGE_SIZE];
|
||||||
|
/* Allocate a new page */
|
||||||
|
if (!cur_page) {
|
||||||
|
cur_page = alloc_page(GFP_NOFS);
|
||||||
|
if (!cur_page)
|
||||||
|
return -ENOMEM;
|
||||||
|
out_pages[*cur_out / PAGE_SIZE] = cur_page;
|
||||||
|
}
|
||||||
|
kaddr = kmap(cur_page);
|
||||||
|
|
||||||
|
memcpy(kaddr + offset_in_page(*cur_out),
|
||||||
|
compressed_data + *cur_out - orig_out, copy_len);
|
||||||
|
|
||||||
|
*cur_out += copy_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if we can fit the next segment header into the remaining space
|
||||||
|
* of the sector.
|
||||||
|
*/
|
||||||
|
sector_bytes_left = round_up(*cur_out, sectorsize) - *cur_out;
|
||||||
|
if (sector_bytes_left >= LZO_LEN || sector_bytes_left == 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
/* The remaining size is not enough, pad it with zeros */
|
||||||
|
memset(kaddr + offset_in_page(*cur_out), 0,
|
||||||
|
sector_bytes_left);
|
||||||
|
*cur_out += sector_bytes_left;
|
||||||
|
|
||||||
|
out:
|
||||||
|
kunmap(cur_page);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
|
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
|
||||||
u64 start, struct page **pages, unsigned long *out_pages,
|
u64 start, struct page **pages, unsigned long *out_pages,
|
||||||
unsigned long *total_in, unsigned long *total_out)
|
unsigned long *total_in, unsigned long *total_out)
|
||||||
{
|
{
|
||||||
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
struct workspace *workspace = list_entry(ws, struct workspace, list);
|
||||||
|
const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
|
||||||
|
struct page *page_in = NULL;
|
||||||
|
char *sizes_ptr;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
char *data_in;
|
/* Points to the file offset of input data */
|
||||||
char *cpage_out, *sizes_ptr;
|
u64 cur_in = start;
|
||||||
int nr_pages = 0;
|
/* Points to the current output byte */
|
||||||
struct page *in_page = NULL;
|
u32 cur_out = 0;
|
||||||
struct page *out_page = NULL;
|
u32 len = *total_out;
|
||||||
unsigned long bytes_left;
|
|
||||||
unsigned long len = *total_out;
|
|
||||||
unsigned long nr_dest_pages = *out_pages;
|
|
||||||
const unsigned long max_out = nr_dest_pages * PAGE_SIZE;
|
|
||||||
size_t in_len;
|
|
||||||
size_t out_len;
|
|
||||||
char *buf;
|
|
||||||
unsigned long tot_in = 0;
|
|
||||||
unsigned long tot_out = 0;
|
|
||||||
unsigned long pg_bytes_left;
|
|
||||||
unsigned long out_offset;
|
|
||||||
unsigned long bytes;
|
|
||||||
|
|
||||||
*out_pages = 0;
|
*out_pages = 0;
|
||||||
*total_out = 0;
|
*total_out = 0;
|
||||||
*total_in = 0;
|
*total_in = 0;
|
||||||
|
|
||||||
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
|
|
||||||
data_in = kmap(in_page);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* store the size of all chunks of compressed data in
|
* Skip the header for now, we will later come back and write the total
|
||||||
* the first 4 bytes
|
* compressed size
|
||||||
*/
|
*/
|
||||||
out_page = alloc_page(GFP_NOFS);
|
cur_out += LZO_LEN;
|
||||||
if (out_page == NULL) {
|
while (cur_in < start + len) {
|
||||||
ret = -ENOMEM;
|
char *data_in;
|
||||||
goto out;
|
const u32 sectorsize_mask = sectorsize - 1;
|
||||||
}
|
u32 sector_off = (cur_in - start) & sectorsize_mask;
|
||||||
cpage_out = kmap(out_page);
|
u32 in_len;
|
||||||
out_offset = LZO_LEN;
|
size_t out_len;
|
||||||
tot_out = LZO_LEN;
|
|
||||||
pages[0] = out_page;
|
|
||||||
nr_pages = 1;
|
|
||||||
pg_bytes_left = PAGE_SIZE - LZO_LEN;
|
|
||||||
|
|
||||||
/* compress at most one page of data each time */
|
/* Get the input page first */
|
||||||
in_len = min(len, PAGE_SIZE);
|
if (!page_in) {
|
||||||
while (tot_in < len) {
|
page_in = find_get_page(mapping, cur_in >> PAGE_SHIFT);
|
||||||
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
|
ASSERT(page_in);
|
||||||
&out_len, workspace->mem);
|
}
|
||||||
if (ret != LZO_E_OK) {
|
|
||||||
pr_debug("BTRFS: lzo in loop returned %d\n",
|
/* Compress at most one sector of data each time */
|
||||||
ret);
|
in_len = min_t(u32, start + len - cur_in, sectorsize - sector_off);
|
||||||
|
ASSERT(in_len);
|
||||||
|
data_in = kmap(page_in);
|
||||||
|
ret = lzo1x_1_compress(data_in +
|
||||||
|
offset_in_page(cur_in), in_len,
|
||||||
|
workspace->cbuf, &out_len,
|
||||||
|
workspace->mem);
|
||||||
|
kunmap(page_in);
|
||||||
|
if (ret < 0) {
|
||||||
|
pr_debug("BTRFS: lzo in loop returned %d\n", ret);
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* store the size of this chunk of compressed data */
|
ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
|
||||||
write_compress_length(cpage_out + out_offset, out_len);
|
pages, &cur_out, sectorsize);
|
||||||
tot_out += LZO_LEN;
|
if (ret < 0)
|
||||||
out_offset += LZO_LEN;
|
goto out;
|
||||||
pg_bytes_left -= LZO_LEN;
|
|
||||||
|
|
||||||
tot_in += in_len;
|
cur_in += in_len;
|
||||||
tot_out += out_len;
|
|
||||||
|
|
||||||
/* copy bytes from the working buffer into the pages */
|
|
||||||
buf = workspace->cbuf;
|
|
||||||
while (out_len) {
|
|
||||||
bytes = min_t(unsigned long, pg_bytes_left, out_len);
|
|
||||||
|
|
||||||
memcpy(cpage_out + out_offset, buf, bytes);
|
|
||||||
|
|
||||||
out_len -= bytes;
|
|
||||||
pg_bytes_left -= bytes;
|
|
||||||
buf += bytes;
|
|
||||||
out_offset += bytes;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we need another page for writing out.
|
* Check if we're making it bigger after two sectors. And if
|
||||||
*
|
* it is so, give up.
|
||||||
* Note if there's less than 4 bytes left, we just
|
|
||||||
* skip to a new page.
|
|
||||||
*/
|
*/
|
||||||
if ((out_len == 0 && pg_bytes_left < LZO_LEN) ||
|
if (cur_in - start > sectorsize * 2 && cur_in - start < cur_out) {
|
||||||
pg_bytes_left == 0) {
|
|
||||||
if (pg_bytes_left) {
|
|
||||||
memset(cpage_out + out_offset, 0,
|
|
||||||
pg_bytes_left);
|
|
||||||
tot_out += pg_bytes_left;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we're done, don't allocate new page */
|
|
||||||
if (out_len == 0 && tot_in >= len)
|
|
||||||
break;
|
|
||||||
|
|
||||||
kunmap(out_page);
|
|
||||||
if (nr_pages == nr_dest_pages) {
|
|
||||||
out_page = NULL;
|
|
||||||
ret = -E2BIG;
|
ret = -E2BIG;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
out_page = alloc_page(GFP_NOFS);
|
/* Check if we have reached page boundary */
|
||||||
if (out_page == NULL) {
|
if (IS_ALIGNED(cur_in, PAGE_SIZE)) {
|
||||||
ret = -ENOMEM;
|
put_page(page_in);
|
||||||
goto out;
|
page_in = NULL;
|
||||||
}
|
|
||||||
cpage_out = kmap(out_page);
|
|
||||||
pages[nr_pages++] = out_page;
|
|
||||||
|
|
||||||
pg_bytes_left = PAGE_SIZE;
|
|
||||||
out_offset = 0;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we're making it bigger, give up */
|
/* Store the size of all chunks of compressed data */
|
||||||
if (tot_in > 8192 && tot_in < tot_out) {
|
|
||||||
ret = -E2BIG;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we're all done */
|
|
||||||
if (tot_in >= len)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (tot_out > max_out)
|
|
||||||
break;
|
|
||||||
|
|
||||||
bytes_left = len - tot_in;
|
|
||||||
kunmap(in_page);
|
|
||||||
put_page(in_page);
|
|
||||||
|
|
||||||
start += PAGE_SIZE;
|
|
||||||
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
|
|
||||||
data_in = kmap(in_page);
|
|
||||||
in_len = min(bytes_left, PAGE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tot_out >= tot_in) {
|
|
||||||
ret = -E2BIG;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* store the size of all chunks of compressed data */
|
|
||||||
sizes_ptr = kmap_local_page(pages[0]);
|
sizes_ptr = kmap_local_page(pages[0]);
|
||||||
write_compress_length(sizes_ptr, tot_out);
|
write_compress_length(sizes_ptr, cur_out);
|
||||||
kunmap_local(sizes_ptr);
|
kunmap_local(sizes_ptr);
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
*total_out = tot_out;
|
*total_out = cur_out;
|
||||||
*total_in = tot_in;
|
*total_in = cur_in - start;
|
||||||
out:
|
out:
|
||||||
*out_pages = nr_pages;
|
*out_pages = DIV_ROUND_UP(cur_out, PAGE_SIZE);
|
||||||
if (out_page)
|
|
||||||
kunmap(out_page);
|
|
||||||
|
|
||||||
if (in_page) {
|
|
||||||
kunmap(in_page);
|
|
||||||
put_page(in_page);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -60,8 +60,7 @@ enum btrfs_rbio_ops {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_raid_bio {
|
struct btrfs_raid_bio {
|
||||||
struct btrfs_fs_info *fs_info;
|
struct btrfs_io_context *bioc;
|
||||||
struct btrfs_bio *bbio;
|
|
||||||
|
|
||||||
/* while we're doing rmw on a stripe
|
/* while we're doing rmw on a stripe
|
||||||
* we put it into a hash table so we can
|
* we put it into a hash table so we can
|
||||||
@@ -192,7 +191,7 @@ static void scrub_parity_work(struct btrfs_work *work);
|
|||||||
static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
|
static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
|
||||||
{
|
{
|
||||||
btrfs_init_work(&rbio->work, work_func, NULL, NULL);
|
btrfs_init_work(&rbio->work, work_func, NULL, NULL);
|
||||||
btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
|
btrfs_queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -271,7 +270,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
|
|||||||
*/
|
*/
|
||||||
static int rbio_bucket(struct btrfs_raid_bio *rbio)
|
static int rbio_bucket(struct btrfs_raid_bio *rbio)
|
||||||
{
|
{
|
||||||
u64 num = rbio->bbio->raid_map[0];
|
u64 num = rbio->bioc->raid_map[0];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we shift down quite a bit. We're using byte
|
* we shift down quite a bit. We're using byte
|
||||||
@@ -345,7 +344,7 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
|
|||||||
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
|
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
table = rbio->fs_info->stripe_hash_table;
|
table = rbio->bioc->fs_info->stripe_hash_table;
|
||||||
h = table->table + bucket;
|
h = table->table + bucket;
|
||||||
|
|
||||||
/* hold the lock for the bucket because we may be
|
/* hold the lock for the bucket because we may be
|
||||||
@@ -400,7 +399,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
|
|||||||
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
|
if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
table = rbio->fs_info->stripe_hash_table;
|
table = rbio->bioc->fs_info->stripe_hash_table;
|
||||||
|
|
||||||
spin_lock_irqsave(&table->cache_lock, flags);
|
spin_lock_irqsave(&table->cache_lock, flags);
|
||||||
__remove_rbio_from_cache(rbio);
|
__remove_rbio_from_cache(rbio);
|
||||||
@@ -460,7 +459,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio)
|
|||||||
if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
|
if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
table = rbio->fs_info->stripe_hash_table;
|
table = rbio->bioc->fs_info->stripe_hash_table;
|
||||||
|
|
||||||
spin_lock_irqsave(&table->cache_lock, flags);
|
spin_lock_irqsave(&table->cache_lock, flags);
|
||||||
spin_lock(&rbio->bio_list_lock);
|
spin_lock(&rbio->bio_list_lock);
|
||||||
@@ -559,8 +558,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
|
|||||||
test_bit(RBIO_CACHE_BIT, &cur->flags))
|
test_bit(RBIO_CACHE_BIT, &cur->flags))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (last->bbio->raid_map[0] !=
|
if (last->bioc->raid_map[0] != cur->bioc->raid_map[0])
|
||||||
cur->bbio->raid_map[0])
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* we can't merge with different operations */
|
/* we can't merge with different operations */
|
||||||
@@ -669,11 +667,11 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
|
|||||||
struct btrfs_raid_bio *cache_drop = NULL;
|
struct btrfs_raid_bio *cache_drop = NULL;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
|
h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
|
||||||
|
|
||||||
spin_lock_irqsave(&h->lock, flags);
|
spin_lock_irqsave(&h->lock, flags);
|
||||||
list_for_each_entry(cur, &h->hash_list, hash_list) {
|
list_for_each_entry(cur, &h->hash_list, hash_list) {
|
||||||
if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
|
if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
spin_lock(&cur->bio_list_lock);
|
spin_lock(&cur->bio_list_lock);
|
||||||
@@ -751,7 +749,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
|||||||
int keep_cache = 0;
|
int keep_cache = 0;
|
||||||
|
|
||||||
bucket = rbio_bucket(rbio);
|
bucket = rbio_bucket(rbio);
|
||||||
h = rbio->fs_info->stripe_hash_table->table + bucket;
|
h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
|
||||||
|
|
||||||
if (list_empty(&rbio->plug_list))
|
if (list_empty(&rbio->plug_list))
|
||||||
cache_rbio(rbio);
|
cache_rbio(rbio);
|
||||||
@@ -838,7 +836,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
btrfs_put_bbio(rbio->bbio);
|
btrfs_put_bioc(rbio->bioc);
|
||||||
kfree(rbio);
|
kfree(rbio);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -865,7 +863,7 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
|
|||||||
struct bio *extra;
|
struct bio *extra;
|
||||||
|
|
||||||
if (rbio->generic_bio_cnt)
|
if (rbio->generic_bio_cnt)
|
||||||
btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt);
|
btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* At this moment, rbio->bio_list is empty, however since rbio does not
|
* At this moment, rbio->bio_list is empty, however since rbio does not
|
||||||
@@ -906,7 +904,7 @@ static void raid_write_end_io(struct bio *bio)
|
|||||||
|
|
||||||
/* OK, we have read all the stripes we need to. */
|
/* OK, we have read all the stripes we need to. */
|
||||||
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
|
max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
|
||||||
0 : rbio->bbio->max_errors;
|
0 : rbio->bioc->max_errors;
|
||||||
if (atomic_read(&rbio->error) > max_errors)
|
if (atomic_read(&rbio->error) > max_errors)
|
||||||
err = BLK_STS_IOERR;
|
err = BLK_STS_IOERR;
|
||||||
|
|
||||||
@@ -961,12 +959,12 @@ static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
|
|||||||
* this does not allocate any pages for rbio->pages.
|
* this does not allocate any pages for rbio->pages.
|
||||||
*/
|
*/
|
||||||
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_bio *bbio,
|
struct btrfs_io_context *bioc,
|
||||||
u64 stripe_len)
|
u64 stripe_len)
|
||||||
{
|
{
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
int nr_data = 0;
|
int nr_data = 0;
|
||||||
int real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
|
int real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
|
||||||
int num_pages = rbio_nr_pages(stripe_len, real_stripes);
|
int num_pages = rbio_nr_pages(stripe_len, real_stripes);
|
||||||
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
|
int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE);
|
||||||
void *p;
|
void *p;
|
||||||
@@ -987,8 +985,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
|||||||
spin_lock_init(&rbio->bio_list_lock);
|
spin_lock_init(&rbio->bio_list_lock);
|
||||||
INIT_LIST_HEAD(&rbio->stripe_cache);
|
INIT_LIST_HEAD(&rbio->stripe_cache);
|
||||||
INIT_LIST_HEAD(&rbio->hash_list);
|
INIT_LIST_HEAD(&rbio->hash_list);
|
||||||
rbio->bbio = bbio;
|
rbio->bioc = bioc;
|
||||||
rbio->fs_info = fs_info;
|
|
||||||
rbio->stripe_len = stripe_len;
|
rbio->stripe_len = stripe_len;
|
||||||
rbio->nr_pages = num_pages;
|
rbio->nr_pages = num_pages;
|
||||||
rbio->real_stripes = real_stripes;
|
rbio->real_stripes = real_stripes;
|
||||||
@@ -1015,9 +1012,9 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
|||||||
CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
|
CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages));
|
||||||
#undef CONSUME_ALLOC
|
#undef CONSUME_ALLOC
|
||||||
|
|
||||||
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
||||||
nr_data = real_stripes - 1;
|
nr_data = real_stripes - 1;
|
||||||
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
||||||
nr_data = real_stripes - 2;
|
nr_data = real_stripes - 2;
|
||||||
else
|
else
|
||||||
BUG();
|
BUG();
|
||||||
@@ -1077,10 +1074,10 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
|
|||||||
struct bio *last = bio_list->tail;
|
struct bio *last = bio_list->tail;
|
||||||
int ret;
|
int ret;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct btrfs_bio_stripe *stripe;
|
struct btrfs_io_stripe *stripe;
|
||||||
u64 disk_start;
|
u64 disk_start;
|
||||||
|
|
||||||
stripe = &rbio->bbio->stripes[stripe_nr];
|
stripe = &rbio->bioc->stripes[stripe_nr];
|
||||||
disk_start = stripe->physical + (page_index << PAGE_SHIFT);
|
disk_start = stripe->physical + (page_index << PAGE_SHIFT);
|
||||||
|
|
||||||
/* if the device is missing, just fail this stripe */
|
/* if the device is missing, just fail this stripe */
|
||||||
@@ -1105,8 +1102,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* put a new bio on the list */
|
/* put a new bio on the list */
|
||||||
bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
|
bio = btrfs_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1);
|
||||||
btrfs_io_bio(bio)->device = stripe->dev;
|
btrfs_bio(bio)->device = stripe->dev;
|
||||||
bio->bi_iter.bi_size = 0;
|
bio->bi_iter.bi_size = 0;
|
||||||
bio_set_dev(bio, stripe->dev->bdev);
|
bio_set_dev(bio, stripe->dev->bdev);
|
||||||
bio->bi_iter.bi_sector = disk_start >> 9;
|
bio->bi_iter.bi_sector = disk_start >> 9;
|
||||||
@@ -1155,11 +1152,11 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
|
|||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
start = bio->bi_iter.bi_sector << 9;
|
start = bio->bi_iter.bi_sector << 9;
|
||||||
stripe_offset = start - rbio->bbio->raid_map[0];
|
stripe_offset = start - rbio->bioc->raid_map[0];
|
||||||
page_index = stripe_offset >> PAGE_SHIFT;
|
page_index = stripe_offset >> PAGE_SHIFT;
|
||||||
|
|
||||||
if (bio_flagged(bio, BIO_CLONED))
|
if (bio_flagged(bio, BIO_CLONED))
|
||||||
bio->bi_iter = btrfs_io_bio(bio)->iter;
|
bio->bi_iter = btrfs_bio(bio)->iter;
|
||||||
|
|
||||||
bio_for_each_segment(bvec, bio, iter) {
|
bio_for_each_segment(bvec, bio, iter) {
|
||||||
rbio->bio_pages[page_index + i] = bvec.bv_page;
|
rbio->bio_pages[page_index + i] = bvec.bv_page;
|
||||||
@@ -1179,7 +1176,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
|
|||||||
*/
|
*/
|
||||||
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
||||||
{
|
{
|
||||||
struct btrfs_bio *bbio = rbio->bbio;
|
struct btrfs_io_context *bioc = rbio->bioc;
|
||||||
void **pointers = rbio->finish_pointers;
|
void **pointers = rbio->finish_pointers;
|
||||||
int nr_data = rbio->nr_data;
|
int nr_data = rbio->nr_data;
|
||||||
int stripe;
|
int stripe;
|
||||||
@@ -1284,11 +1281,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (likely(!bbio->num_tgtdevs))
|
if (likely(!bioc->num_tgtdevs))
|
||||||
goto write_data;
|
goto write_data;
|
||||||
|
|
||||||
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
|
||||||
if (!bbio->tgtdev_map[stripe])
|
if (!bioc->tgtdev_map[stripe])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
|
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
|
||||||
@@ -1302,7 +1299,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = rbio_add_io_page(rbio, &bio_list, page,
|
ret = rbio_add_io_page(rbio, &bio_list, page,
|
||||||
rbio->bbio->tgtdev_map[stripe],
|
rbio->bioc->tgtdev_map[stripe],
|
||||||
pagenr, rbio->stripe_len);
|
pagenr, rbio->stripe_len);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@@ -1339,12 +1336,12 @@ static int find_bio_stripe(struct btrfs_raid_bio *rbio,
|
|||||||
{
|
{
|
||||||
u64 physical = bio->bi_iter.bi_sector;
|
u64 physical = bio->bi_iter.bi_sector;
|
||||||
int i;
|
int i;
|
||||||
struct btrfs_bio_stripe *stripe;
|
struct btrfs_io_stripe *stripe;
|
||||||
|
|
||||||
physical <<= 9;
|
physical <<= 9;
|
||||||
|
|
||||||
for (i = 0; i < rbio->bbio->num_stripes; i++) {
|
for (i = 0; i < rbio->bioc->num_stripes; i++) {
|
||||||
stripe = &rbio->bbio->stripes[i];
|
stripe = &rbio->bioc->stripes[i];
|
||||||
if (in_range(physical, stripe->physical, rbio->stripe_len) &&
|
if (in_range(physical, stripe->physical, rbio->stripe_len) &&
|
||||||
stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
|
stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) {
|
||||||
return i;
|
return i;
|
||||||
@@ -1365,7 +1362,7 @@ static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < rbio->nr_data; i++) {
|
for (i = 0; i < rbio->nr_data; i++) {
|
||||||
u64 stripe_start = rbio->bbio->raid_map[i];
|
u64 stripe_start = rbio->bioc->raid_map[i];
|
||||||
|
|
||||||
if (in_range(logical, stripe_start, rbio->stripe_len))
|
if (in_range(logical, stripe_start, rbio->stripe_len))
|
||||||
return i;
|
return i;
|
||||||
@@ -1456,7 +1453,7 @@ static void raid_rmw_end_io(struct bio *bio)
|
|||||||
if (!atomic_dec_and_test(&rbio->stripes_pending))
|
if (!atomic_dec_and_test(&rbio->stripes_pending))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1538,8 +1535,8 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the bbio may be freed once we submit the last bio. Make sure
|
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||||
* not to touch it after that
|
* touch it after that.
|
||||||
*/
|
*/
|
||||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||||
while ((bio = bio_list_pop(&bio_list))) {
|
while ((bio = bio_list_pop(&bio_list))) {
|
||||||
@@ -1547,7 +1544,7 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
|||||||
bio->bi_end_io = raid_rmw_end_io;
|
bio->bi_end_io = raid_rmw_end_io;
|
||||||
bio->bi_opf = REQ_OP_READ;
|
bio->bi_opf = REQ_OP_READ;
|
||||||
|
|
||||||
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||||
|
|
||||||
submit_bio(bio);
|
submit_bio(bio);
|
||||||
}
|
}
|
||||||
@@ -1719,17 +1716,18 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
|||||||
/*
|
/*
|
||||||
* our main entry point for writes from the rest of the FS.
|
* our main entry point for writes from the rest of the FS.
|
||||||
*/
|
*/
|
||||||
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
|
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
struct btrfs_bio *bbio, u64 stripe_len)
|
u64 stripe_len)
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
struct btrfs_plug_cb *plug = NULL;
|
struct btrfs_plug_cb *plug = NULL;
|
||||||
struct blk_plug_cb *cb;
|
struct blk_plug_cb *cb;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
rbio = alloc_rbio(fs_info, bioc, stripe_len);
|
||||||
if (IS_ERR(rbio)) {
|
if (IS_ERR(rbio)) {
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
return PTR_ERR(rbio);
|
return PTR_ERR(rbio);
|
||||||
}
|
}
|
||||||
bio_list_add(&rbio->bio_list, bio);
|
bio_list_add(&rbio->bio_list, bio);
|
||||||
@@ -1842,7 +1840,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* all raid6 handling here */
|
/* all raid6 handling here */
|
||||||
if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) {
|
if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
|
||||||
/*
|
/*
|
||||||
* single failure, rebuild from parity raid5
|
* single failure, rebuild from parity raid5
|
||||||
* style
|
* style
|
||||||
@@ -1874,8 +1872,8 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|||||||
* here due to a crc mismatch and we can't give them the
|
* here due to a crc mismatch and we can't give them the
|
||||||
* data they want
|
* data they want
|
||||||
*/
|
*/
|
||||||
if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) {
|
if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
|
||||||
if (rbio->bbio->raid_map[faila] ==
|
if (rbio->bioc->raid_map[faila] ==
|
||||||
RAID5_P_STRIPE) {
|
RAID5_P_STRIPE) {
|
||||||
err = BLK_STS_IOERR;
|
err = BLK_STS_IOERR;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@@ -1887,7 +1885,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|||||||
goto pstripe;
|
goto pstripe;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) {
|
if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
|
||||||
raid6_datap_recov(rbio->real_stripes,
|
raid6_datap_recov(rbio->real_stripes,
|
||||||
PAGE_SIZE, faila, pointers);
|
PAGE_SIZE, faila, pointers);
|
||||||
} else {
|
} else {
|
||||||
@@ -2006,7 +2004,7 @@ static void raid_recover_end_io(struct bio *bio)
|
|||||||
if (!atomic_dec_and_test(&rbio->stripes_pending))
|
if (!atomic_dec_and_test(&rbio->stripes_pending))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||||
else
|
else
|
||||||
__raid_recover_end_io(rbio);
|
__raid_recover_end_io(rbio);
|
||||||
@@ -2074,7 +2072,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|||||||
* were up to date, or we might have no bios to read because
|
* were up to date, or we might have no bios to read because
|
||||||
* the devices were gone.
|
* the devices were gone.
|
||||||
*/
|
*/
|
||||||
if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
|
if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
|
||||||
__raid_recover_end_io(rbio);
|
__raid_recover_end_io(rbio);
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
@@ -2083,8 +2081,8 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the bbio may be freed once we submit the last bio. Make sure
|
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||||
* not to touch it after that
|
* touch it after that.
|
||||||
*/
|
*/
|
||||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||||
while ((bio = bio_list_pop(&bio_list))) {
|
while ((bio = bio_list_pop(&bio_list))) {
|
||||||
@@ -2092,7 +2090,7 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
|||||||
bio->bi_end_io = raid_recover_end_io;
|
bio->bi_end_io = raid_recover_end_io;
|
||||||
bio->bi_opf = REQ_OP_READ;
|
bio->bi_opf = REQ_OP_READ;
|
||||||
|
|
||||||
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||||
|
|
||||||
submit_bio(bio);
|
submit_bio(bio);
|
||||||
}
|
}
|
||||||
@@ -2116,22 +2114,22 @@ cleanup:
|
|||||||
* so we assume the bio they send down corresponds to a failed part
|
* so we assume the bio they send down corresponds to a failed part
|
||||||
* of the drive.
|
* of the drive.
|
||||||
*/
|
*/
|
||||||
int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
struct btrfs_bio *bbio, u64 stripe_len,
|
u64 stripe_len, int mirror_num, int generic_io)
|
||||||
int mirror_num, int generic_io)
|
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (generic_io) {
|
if (generic_io) {
|
||||||
ASSERT(bbio->mirror_num == mirror_num);
|
ASSERT(bioc->mirror_num == mirror_num);
|
||||||
btrfs_io_bio(bio)->mirror_num = mirror_num;
|
btrfs_bio(bio)->mirror_num = mirror_num;
|
||||||
}
|
}
|
||||||
|
|
||||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
rbio = alloc_rbio(fs_info, bioc, stripe_len);
|
||||||
if (IS_ERR(rbio)) {
|
if (IS_ERR(rbio)) {
|
||||||
if (generic_io)
|
if (generic_io)
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
return PTR_ERR(rbio);
|
return PTR_ERR(rbio);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2142,11 +2140,11 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|||||||
rbio->faila = find_logical_bio_stripe(rbio, bio);
|
rbio->faila = find_logical_bio_stripe(rbio, bio);
|
||||||
if (rbio->faila == -1) {
|
if (rbio->faila == -1) {
|
||||||
btrfs_warn(fs_info,
|
btrfs_warn(fs_info,
|
||||||
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)",
|
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
|
||||||
__func__, bio->bi_iter.bi_sector << 9,
|
__func__, bio->bi_iter.bi_sector << 9,
|
||||||
(u64)bio->bi_iter.bi_size, bbio->map_type);
|
(u64)bio->bi_iter.bi_size, bioc->map_type);
|
||||||
if (generic_io)
|
if (generic_io)
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
kfree(rbio);
|
kfree(rbio);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
@@ -2155,7 +2153,7 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|||||||
btrfs_bio_counter_inc_noblocked(fs_info);
|
btrfs_bio_counter_inc_noblocked(fs_info);
|
||||||
rbio->generic_bio_cnt = 1;
|
rbio->generic_bio_cnt = 1;
|
||||||
} else {
|
} else {
|
||||||
btrfs_get_bbio(bbio);
|
btrfs_get_bioc(bioc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -2214,23 +2212,23 @@ static void read_rebuild_work(struct btrfs_work *work)
|
|||||||
/*
|
/*
|
||||||
* The following code is used to scrub/replace the parity stripe
|
* The following code is used to scrub/replace the parity stripe
|
||||||
*
|
*
|
||||||
* Caller must have already increased bio_counter for getting @bbio.
|
* Caller must have already increased bio_counter for getting @bioc.
|
||||||
*
|
*
|
||||||
* Note: We need make sure all the pages that add into the scrub/replace
|
* Note: We need make sure all the pages that add into the scrub/replace
|
||||||
* raid bio are correct and not be changed during the scrub/replace. That
|
* raid bio are correct and not be changed during the scrub/replace. That
|
||||||
* is those pages just hold metadata or file data with checksum.
|
* is those pages just hold metadata or file data with checksum.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct btrfs_raid_bio *
|
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||||
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
struct btrfs_io_context *bioc,
|
||||||
struct btrfs_bio *bbio, u64 stripe_len,
|
u64 stripe_len, struct btrfs_device *scrub_dev,
|
||||||
struct btrfs_device *scrub_dev,
|
|
||||||
unsigned long *dbitmap, int stripe_nsectors)
|
unsigned long *dbitmap, int stripe_nsectors)
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
rbio = alloc_rbio(fs_info, bbio, stripe_len);
|
rbio = alloc_rbio(fs_info, bioc, stripe_len);
|
||||||
if (IS_ERR(rbio))
|
if (IS_ERR(rbio))
|
||||||
return NULL;
|
return NULL;
|
||||||
bio_list_add(&rbio->bio_list, bio);
|
bio_list_add(&rbio->bio_list, bio);
|
||||||
@@ -2242,12 +2240,12 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|||||||
rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
|
rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted
|
* After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted
|
||||||
* to the end position, so this search can start from the first parity
|
* to the end position, so this search can start from the first parity
|
||||||
* stripe.
|
* stripe.
|
||||||
*/
|
*/
|
||||||
for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
|
for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
|
||||||
if (bbio->stripes[i].dev == scrub_dev) {
|
if (bioc->stripes[i].dev == scrub_dev) {
|
||||||
rbio->scrubp = i;
|
rbio->scrubp = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -2260,7 +2258,7 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|||||||
bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
|
bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have already increased bio_counter when getting bbio, record it
|
* We have already increased bio_counter when getting bioc, record it
|
||||||
* so we can free it at rbio_orig_end_io().
|
* so we can free it at rbio_orig_end_io().
|
||||||
*/
|
*/
|
||||||
rbio->generic_bio_cnt = 1;
|
rbio->generic_bio_cnt = 1;
|
||||||
@@ -2275,10 +2273,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
|||||||
int stripe_offset;
|
int stripe_offset;
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
ASSERT(logical >= rbio->bbio->raid_map[0]);
|
ASSERT(logical >= rbio->bioc->raid_map[0]);
|
||||||
ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
|
ASSERT(logical + PAGE_SIZE <= rbio->bioc->raid_map[0] +
|
||||||
rbio->stripe_len * rbio->nr_data);
|
rbio->stripe_len * rbio->nr_data);
|
||||||
stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
|
stripe_offset = (int)(logical - rbio->bioc->raid_map[0]);
|
||||||
index = stripe_offset >> PAGE_SHIFT;
|
index = stripe_offset >> PAGE_SHIFT;
|
||||||
rbio->bio_pages[index] = page;
|
rbio->bio_pages[index] = page;
|
||||||
}
|
}
|
||||||
@@ -2312,7 +2310,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
|
|||||||
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||||
int need_check)
|
int need_check)
|
||||||
{
|
{
|
||||||
struct btrfs_bio *bbio = rbio->bbio;
|
struct btrfs_io_context *bioc = rbio->bioc;
|
||||||
void **pointers = rbio->finish_pointers;
|
void **pointers = rbio->finish_pointers;
|
||||||
unsigned long *pbitmap = rbio->finish_pbitmap;
|
unsigned long *pbitmap = rbio->finish_pbitmap;
|
||||||
int nr_data = rbio->nr_data;
|
int nr_data = rbio->nr_data;
|
||||||
@@ -2335,7 +2333,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
|||||||
else
|
else
|
||||||
BUG();
|
BUG();
|
||||||
|
|
||||||
if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
|
if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) {
|
||||||
is_replace = 1;
|
is_replace = 1;
|
||||||
bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
|
bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages);
|
||||||
}
|
}
|
||||||
@@ -2435,7 +2433,7 @@ writeback:
|
|||||||
|
|
||||||
page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
|
page = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
|
||||||
ret = rbio_add_io_page(rbio, &bio_list, page,
|
ret = rbio_add_io_page(rbio, &bio_list, page,
|
||||||
bbio->tgtdev_map[rbio->scrubp],
|
bioc->tgtdev_map[rbio->scrubp],
|
||||||
pagenr, rbio->stripe_len);
|
pagenr, rbio->stripe_len);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@@ -2483,7 +2481,7 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
|
|||||||
*/
|
*/
|
||||||
static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
|
static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
|
||||||
{
|
{
|
||||||
if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
|
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
if (rbio->faila >= 0 || rbio->failb >= 0) {
|
if (rbio->faila >= 0 || rbio->failb >= 0) {
|
||||||
@@ -2504,7 +2502,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
|
|||||||
* the data, so the capability of the repair is declined.
|
* the data, so the capability of the repair is declined.
|
||||||
* (In the case of RAID5, we can not repair anything)
|
* (In the case of RAID5, we can not repair anything)
|
||||||
*/
|
*/
|
||||||
if (dfail > rbio->bbio->max_errors - 1)
|
if (dfail > rbio->bioc->max_errors - 1)
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -2625,8 +2623,8 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the bbio may be freed once we submit the last bio. Make sure
|
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||||
* not to touch it after that
|
* touch it after that.
|
||||||
*/
|
*/
|
||||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||||
while ((bio = bio_list_pop(&bio_list))) {
|
while ((bio = bio_list_pop(&bio_list))) {
|
||||||
@@ -2634,7 +2632,7 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
|
|||||||
bio->bi_end_io = raid56_parity_scrub_end_io;
|
bio->bi_end_io = raid56_parity_scrub_end_io;
|
||||||
bio->bi_opf = REQ_OP_READ;
|
bio->bi_opf = REQ_OP_READ;
|
||||||
|
|
||||||
btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
btrfs_bio_wq_end_io(rbio->bioc->fs_info, bio, BTRFS_WQ_ENDIO_RAID56);
|
||||||
|
|
||||||
submit_bio(bio);
|
submit_bio(bio);
|
||||||
}
|
}
|
||||||
@@ -2670,12 +2668,13 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
|
|||||||
/* The following code is used for dev replace of a missing RAID 5/6 device. */
|
/* The following code is used for dev replace of a missing RAID 5/6 device. */
|
||||||
|
|
||||||
struct btrfs_raid_bio *
|
struct btrfs_raid_bio *
|
||||||
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
struct btrfs_bio *bbio, u64 length)
|
u64 length)
|
||||||
{
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
|
|
||||||
rbio = alloc_rbio(fs_info, bbio, length);
|
rbio = alloc_rbio(fs_info, bioc, length);
|
||||||
if (IS_ERR(rbio))
|
if (IS_ERR(rbio))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
@@ -2695,7 +2694,7 @@ raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When we get bbio, we have already increased bio_counter, record it
|
* When we get bioc, we have already increased bio_counter, record it
|
||||||
* so we can free it at rbio_orig_end_io()
|
* so we can free it at rbio_orig_end_io()
|
||||||
*/
|
*/
|
||||||
rbio->generic_bio_cnt = 1;
|
rbio->generic_bio_cnt = 1;
|
||||||
|
|||||||
@@ -30,25 +30,23 @@ static inline int nr_data_stripes(const struct map_lookup *map)
|
|||||||
struct btrfs_raid_bio;
|
struct btrfs_raid_bio;
|
||||||
struct btrfs_device;
|
struct btrfs_device;
|
||||||
|
|
||||||
int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
|
int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
struct btrfs_bio *bbio, u64 stripe_len,
|
u64 stripe_len, int mirror_num, int generic_io);
|
||||||
int mirror_num, int generic_io);
|
int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
|
u64 stripe_len);
|
||||||
struct btrfs_bio *bbio, u64 stripe_len);
|
|
||||||
|
|
||||||
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
||||||
u64 logical);
|
u64 logical);
|
||||||
|
|
||||||
struct btrfs_raid_bio *
|
struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||||
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
struct btrfs_io_context *bioc, u64 stripe_len,
|
||||||
struct btrfs_bio *bbio, u64 stripe_len,
|
|
||||||
struct btrfs_device *scrub_dev,
|
struct btrfs_device *scrub_dev,
|
||||||
unsigned long *dbitmap, int stripe_nsectors);
|
unsigned long *dbitmap, int stripe_nsectors);
|
||||||
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
|
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
|
||||||
|
|
||||||
struct btrfs_raid_bio *
|
struct btrfs_raid_bio *
|
||||||
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc,
|
||||||
struct btrfs_bio *bbio, u64 length);
|
u64 length);
|
||||||
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
|
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
|
||||||
|
|
||||||
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
|
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
|
||||||
|
|||||||
@@ -227,7 +227,7 @@ start_machine:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
||||||
struct btrfs_bio *bbio)
|
struct btrfs_io_context *bioc)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||||
int ret;
|
int ret;
|
||||||
@@ -275,11 +275,11 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
|
|||||||
kref_init(&zone->refcnt);
|
kref_init(&zone->refcnt);
|
||||||
zone->elems = 0;
|
zone->elems = 0;
|
||||||
zone->device = dev; /* our device always sits at index 0 */
|
zone->device = dev; /* our device always sits at index 0 */
|
||||||
for (i = 0; i < bbio->num_stripes; ++i) {
|
for (i = 0; i < bioc->num_stripes; ++i) {
|
||||||
/* bounds have already been checked */
|
/* bounds have already been checked */
|
||||||
zone->devs[i] = bbio->stripes[i].dev;
|
zone->devs[i] = bioc->stripes[i].dev;
|
||||||
}
|
}
|
||||||
zone->ndevs = bbio->num_stripes;
|
zone->ndevs = bioc->num_stripes;
|
||||||
|
|
||||||
spin_lock(&fs_info->reada_lock);
|
spin_lock(&fs_info->reada_lock);
|
||||||
ret = radix_tree_insert(&dev->reada_zones,
|
ret = radix_tree_insert(&dev->reada_zones,
|
||||||
@@ -309,7 +309,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
|||||||
int ret;
|
int ret;
|
||||||
struct reada_extent *re = NULL;
|
struct reada_extent *re = NULL;
|
||||||
struct reada_extent *re_exist = NULL;
|
struct reada_extent *re_exist = NULL;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
struct btrfs_device *prev_dev;
|
struct btrfs_device *prev_dev;
|
||||||
u64 length;
|
u64 length;
|
||||||
@@ -345,28 +345,28 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
|||||||
*/
|
*/
|
||||||
length = fs_info->nodesize;
|
length = fs_info->nodesize;
|
||||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||||
&length, &bbio, 0);
|
&length, &bioc, 0);
|
||||||
if (ret || !bbio || length < fs_info->nodesize)
|
if (ret || !bioc || length < fs_info->nodesize)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
if (bbio->num_stripes > BTRFS_MAX_MIRRORS) {
|
if (bioc->num_stripes > BTRFS_MAX_MIRRORS) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"readahead: more than %d copies not supported",
|
"readahead: more than %d copies not supported",
|
||||||
BTRFS_MAX_MIRRORS);
|
BTRFS_MAX_MIRRORS);
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
real_stripes = bbio->num_stripes - bbio->num_tgtdevs;
|
real_stripes = bioc->num_stripes - bioc->num_tgtdevs;
|
||||||
for (nzones = 0; nzones < real_stripes; ++nzones) {
|
for (nzones = 0; nzones < real_stripes; ++nzones) {
|
||||||
struct reada_zone *zone;
|
struct reada_zone *zone;
|
||||||
|
|
||||||
dev = bbio->stripes[nzones].dev;
|
dev = bioc->stripes[nzones].dev;
|
||||||
|
|
||||||
/* cannot read ahead on missing device. */
|
/* cannot read ahead on missing device. */
|
||||||
if (!dev->bdev)
|
if (!dev->bdev)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
zone = reada_find_zone(dev, logical, bbio);
|
zone = reada_find_zone(dev, logical, bioc);
|
||||||
if (!zone)
|
if (!zone)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
@@ -464,7 +464,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
|
|||||||
if (!have_zone)
|
if (!have_zone)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
return re;
|
return re;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
@@ -488,7 +488,7 @@ error:
|
|||||||
kref_put(&zone->refcnt, reada_zone_release);
|
kref_put(&zone->refcnt, reada_zone_release);
|
||||||
spin_unlock(&fs_info->reada_lock);
|
spin_unlock(&fs_info->reada_lock);
|
||||||
}
|
}
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
kfree(re);
|
kfree(re);
|
||||||
return re_exist;
|
return re_exist;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -678,10 +678,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
if (generic_ref->type == BTRFS_REF_METADATA) {
|
if (generic_ref->type == BTRFS_REF_METADATA) {
|
||||||
if (!parent)
|
if (!parent)
|
||||||
ref_root = generic_ref->tree_ref.root;
|
ref_root = generic_ref->tree_ref.owning_root;
|
||||||
owner = generic_ref->tree_ref.level;
|
owner = generic_ref->tree_ref.level;
|
||||||
} else if (!parent) {
|
} else if (!parent) {
|
||||||
ref_root = generic_ref->data_ref.ref_root;
|
ref_root = generic_ref->data_ref.owning_root;
|
||||||
owner = generic_ref->data_ref.ino;
|
owner = generic_ref->data_ref.ino;
|
||||||
offset = generic_ref->data_ref.offset;
|
offset = generic_ref->data_ref.offset;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -138,7 +138,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
|
|||||||
}
|
}
|
||||||
|
|
||||||
btrfs_page_set_uptodate(fs_info, page, file_offset, block_size);
|
btrfs_page_set_uptodate(fs_info, page, file_offset, block_size);
|
||||||
ClearPageChecked(page);
|
btrfs_page_clear_checked(fs_info, page, file_offset, block_size);
|
||||||
btrfs_page_set_dirty(fs_info, page, file_offset, block_size);
|
btrfs_page_set_dirty(fs_info, page, file_offset, block_size);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
if (page) {
|
if (page) {
|
||||||
@@ -649,7 +649,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
|
|||||||
static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
|
static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
|
||||||
struct inode *dst, u64 dst_loff)
|
struct inode *dst, u64 dst_loff)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret = 0;
|
||||||
u64 i, tail_len, chunk_count;
|
u64 i, tail_len, chunk_count;
|
||||||
struct btrfs_root *root_dst = BTRFS_I(dst)->root;
|
struct btrfs_root *root_dst = BTRFS_I(dst)->root;
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
#include "backref.h"
|
#include "backref.h"
|
||||||
#include "misc.h"
|
#include "misc.h"
|
||||||
#include "subpage.h"
|
#include "subpage.h"
|
||||||
|
#include "zoned.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Relocation overview
|
* Relocation overview
|
||||||
@@ -1145,9 +1146,9 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||||||
key.offset -= btrfs_file_extent_offset(leaf, fi);
|
key.offset -= btrfs_file_extent_offset(leaf, fi);
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
|
||||||
num_bytes, parent);
|
num_bytes, parent);
|
||||||
ref.real_root = root->root_key.objectid;
|
|
||||||
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
|
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
|
||||||
key.objectid, key.offset);
|
key.objectid, key.offset,
|
||||||
|
root->root_key.objectid, false);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -1156,9 +1157,9 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
|
||||||
num_bytes, parent);
|
num_bytes, parent);
|
||||||
ref.real_root = root->root_key.objectid;
|
|
||||||
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
|
btrfs_init_data_ref(&ref, btrfs_header_owner(leaf),
|
||||||
key.objectid, key.offset);
|
key.objectid, key.offset,
|
||||||
|
root->root_key.objectid, false);
|
||||||
ret = btrfs_free_extent(trans, &ref);
|
ret = btrfs_free_extent(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -1367,8 +1368,8 @@ again:
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, old_bytenr,
|
||||||
blocksize, path->nodes[level]->start);
|
blocksize, path->nodes[level]->start);
|
||||||
ref.skip_qgroup = true;
|
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
|
||||||
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
|
0, true);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -1376,8 +1377,8 @@ again:
|
|||||||
}
|
}
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, new_bytenr,
|
||||||
blocksize, 0);
|
blocksize, 0);
|
||||||
ref.skip_qgroup = true;
|
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid, 0,
|
||||||
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
|
true);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -1386,8 +1387,8 @@ again:
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, new_bytenr,
|
||||||
blocksize, path->nodes[level]->start);
|
blocksize, path->nodes[level]->start);
|
||||||
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid);
|
btrfs_init_tree_ref(&ref, level - 1, src->root_key.objectid,
|
||||||
ref.skip_qgroup = true;
|
0, true);
|
||||||
ret = btrfs_free_extent(trans, &ref);
|
ret = btrfs_free_extent(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -1396,8 +1397,8 @@ again:
|
|||||||
|
|
||||||
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
|
btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, old_bytenr,
|
||||||
blocksize, 0);
|
blocksize, 0);
|
||||||
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid);
|
btrfs_init_tree_ref(&ref, level - 1, dest->root_key.objectid,
|
||||||
ref.skip_qgroup = true;
|
0, true);
|
||||||
ret = btrfs_free_extent(trans, &ref);
|
ret = btrfs_free_extent(trans, &ref);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, ret);
|
btrfs_abort_transaction(trans, ret);
|
||||||
@@ -2473,9 +2474,9 @@ static int do_relocation(struct btrfs_trans_handle *trans,
|
|||||||
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
|
btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
|
||||||
node->eb->start, blocksize,
|
node->eb->start, blocksize,
|
||||||
upper->eb->start);
|
upper->eb->start);
|
||||||
ref.real_root = root->root_key.objectid;
|
|
||||||
btrfs_init_tree_ref(&ref, node->level,
|
btrfs_init_tree_ref(&ref, node->level,
|
||||||
btrfs_header_owner(upper->eb));
|
btrfs_header_owner(upper->eb),
|
||||||
|
root->root_key.objectid, false);
|
||||||
ret = btrfs_inc_extent_ref(trans, &ref);
|
ret = btrfs_inc_extent_ref(trans, &ref);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = btrfs_drop_subtree(trans, root, eb,
|
ret = btrfs_drop_subtree(trans, root, eb,
|
||||||
@@ -2691,8 +2692,12 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,
|
|||||||
list_add_tail(&node->list, &rc->backref_cache.changed);
|
list_add_tail(&node->list, &rc->backref_cache.changed);
|
||||||
} else {
|
} else {
|
||||||
path->lowest_level = node->level;
|
path->lowest_level = node->level;
|
||||||
|
if (root == root->fs_info->chunk_root)
|
||||||
|
btrfs_reserve_chunk_metadata(trans, false);
|
||||||
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
if (root == root->fs_info->chunk_root)
|
||||||
|
btrfs_trans_release_chunk_metadata(trans);
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
}
|
}
|
||||||
@@ -2852,31 +2857,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
/*
|
|
||||||
* On a zoned filesystem, we cannot preallocate the file region.
|
|
||||||
* Instead, we dirty and fiemap_write the region.
|
|
||||||
*/
|
|
||||||
if (btrfs_is_zoned(inode->root->fs_info)) {
|
|
||||||
struct btrfs_root *root = inode->root;
|
|
||||||
struct btrfs_trans_handle *trans;
|
|
||||||
|
|
||||||
end = cluster->end - offset + 1;
|
|
||||||
trans = btrfs_start_transaction(root, 1);
|
|
||||||
if (IS_ERR(trans))
|
|
||||||
return PTR_ERR(trans);
|
|
||||||
|
|
||||||
inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
|
|
||||||
i_size_write(&inode->vfs_inode, end);
|
|
||||||
ret = btrfs_update_inode(trans, root, inode);
|
|
||||||
if (ret) {
|
|
||||||
btrfs_abort_transaction(trans, ret);
|
|
||||||
btrfs_end_transaction(trans);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
return btrfs_end_transaction(trans);
|
|
||||||
}
|
|
||||||
|
|
||||||
btrfs_inode_lock(&inode->vfs_inode, 0);
|
btrfs_inode_lock(&inode->vfs_inode, 0);
|
||||||
for (nr = 0; nr < cluster->nr; nr++) {
|
for (nr = 0; nr < cluster->nr; nr++) {
|
||||||
start = cluster->boundary[nr] - offset;
|
start = cluster->boundary[nr] - offset;
|
||||||
@@ -2903,9 +2883,8 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline_for_stack
|
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
|
||||||
int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
|
u64 start, u64 end, u64 block_start)
|
||||||
u64 block_start)
|
|
||||||
{
|
{
|
||||||
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
||||||
struct extent_map *em;
|
struct extent_map *em;
|
||||||
@@ -3084,7 +3063,6 @@ release_page:
|
|||||||
static int relocate_file_extent_cluster(struct inode *inode,
|
static int relocate_file_extent_cluster(struct inode *inode,
|
||||||
struct file_extent_cluster *cluster)
|
struct file_extent_cluster *cluster)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
|
||||||
u64 offset = BTRFS_I(inode)->index_cnt;
|
u64 offset = BTRFS_I(inode)->index_cnt;
|
||||||
unsigned long index;
|
unsigned long index;
|
||||||
unsigned long last_index;
|
unsigned long last_index;
|
||||||
@@ -3105,7 +3083,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
|||||||
|
|
||||||
file_ra_state_init(ra, inode->i_mapping);
|
file_ra_state_init(ra, inode->i_mapping);
|
||||||
|
|
||||||
ret = setup_extent_mapping(inode, cluster->start - offset,
|
ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
|
||||||
cluster->end - offset, cluster->start);
|
cluster->end - offset, cluster->start);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
@@ -3114,8 +3092,6 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
|||||||
for (index = (cluster->start - offset) >> PAGE_SHIFT;
|
for (index = (cluster->start - offset) >> PAGE_SHIFT;
|
||||||
index <= last_index && !ret; index++)
|
index <= last_index && !ret; index++)
|
||||||
ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
|
ret = relocate_one_page(inode, ra, cluster, &cluster_nr, index);
|
||||||
if (btrfs_is_zoned(fs_info) && !ret)
|
|
||||||
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
WARN_ON(cluster_nr != cluster->nr);
|
WARN_ON(cluster_nr != cluster->nr);
|
||||||
out:
|
out:
|
||||||
@@ -3770,12 +3746,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
|
|||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
struct btrfs_inode_item *item;
|
struct btrfs_inode_item *item;
|
||||||
struct extent_buffer *leaf;
|
struct extent_buffer *leaf;
|
||||||
u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
|
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (btrfs_is_zoned(trans->fs_info))
|
|
||||||
flags &= ~BTRFS_INODE_PREALLOC;
|
|
||||||
|
|
||||||
path = btrfs_alloc_path();
|
path = btrfs_alloc_path();
|
||||||
if (!path)
|
if (!path)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
@@ -3790,7 +3762,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
|
|||||||
btrfs_set_inode_generation(leaf, item, 1);
|
btrfs_set_inode_generation(leaf, item, 1);
|
||||||
btrfs_set_inode_size(leaf, item, 0);
|
btrfs_set_inode_size(leaf, item, 0);
|
||||||
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
|
btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
|
||||||
btrfs_set_inode_flags(leaf, item, flags);
|
btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
|
||||||
|
BTRFS_INODE_PREALLOC);
|
||||||
btrfs_mark_buffer_dirty(leaf);
|
btrfs_mark_buffer_dirty(leaf);
|
||||||
out:
|
out:
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
@@ -4063,6 +4036,9 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
|||||||
rc->block_group->start,
|
rc->block_group->start,
|
||||||
rc->block_group->length);
|
rc->block_group->length);
|
||||||
|
|
||||||
|
ret = btrfs_zone_finish(rc->block_group);
|
||||||
|
WARN_ON(ret && ret != -EAGAIN);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
int finishes_stage;
|
int finishes_stage;
|
||||||
|
|
||||||
@@ -4386,8 +4362,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
|
|||||||
if (!rc)
|
if (!rc)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
|
BUG_ON(rc->stage == UPDATE_DATA_PTRS && btrfs_is_data_reloc_root(root));
|
||||||
root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
|
|
||||||
|
|
||||||
level = btrfs_header_level(buf);
|
level = btrfs_header_level(buf);
|
||||||
if (btrfs_header_generation(buf) <=
|
if (btrfs_header_generation(buf) <=
|
||||||
|
|||||||
139
fs/btrfs/scrub.c
139
fs/btrfs/scrub.c
@@ -57,7 +57,7 @@ struct scrub_ctx;
|
|||||||
|
|
||||||
struct scrub_recover {
|
struct scrub_recover {
|
||||||
refcount_t refs;
|
refcount_t refs;
|
||||||
struct btrfs_bio *bbio;
|
struct btrfs_io_context *bioc;
|
||||||
u64 map_length;
|
u64 map_length;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -254,7 +254,7 @@ static void scrub_put_ctx(struct scrub_ctx *sctx);
|
|||||||
static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
|
static inline int scrub_is_page_on_raid56(struct scrub_page *spage)
|
||||||
{
|
{
|
||||||
return spage->recover &&
|
return spage->recover &&
|
||||||
(spage->recover->bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
|
(spage->recover->bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
|
static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
|
||||||
@@ -798,7 +798,7 @@ static inline void scrub_put_recover(struct btrfs_fs_info *fs_info,
|
|||||||
{
|
{
|
||||||
if (refcount_dec_and_test(&recover->refs)) {
|
if (refcount_dec_and_test(&recover->refs)) {
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
btrfs_put_bbio(recover->bbio);
|
btrfs_put_bioc(recover->bioc);
|
||||||
kfree(recover);
|
kfree(recover);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1027,8 +1027,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
|
|||||||
sblock_other = sblocks_for_recheck + mirror_index;
|
sblock_other = sblocks_for_recheck + mirror_index;
|
||||||
} else {
|
} else {
|
||||||
struct scrub_recover *r = sblock_bad->pagev[0]->recover;
|
struct scrub_recover *r = sblock_bad->pagev[0]->recover;
|
||||||
int max_allowed = r->bbio->num_stripes -
|
int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs;
|
||||||
r->bbio->num_tgtdevs;
|
|
||||||
|
|
||||||
if (mirror_index >= max_allowed)
|
if (mirror_index >= max_allowed)
|
||||||
break;
|
break;
|
||||||
@@ -1218,14 +1217,14 @@ out:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio)
|
static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
|
||||||
{
|
{
|
||||||
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
|
||||||
return 2;
|
return 2;
|
||||||
else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
|
||||||
return 3;
|
return 3;
|
||||||
else
|
else
|
||||||
return (int)bbio->num_stripes;
|
return (int)bioc->num_stripes;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
|
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
|
||||||
@@ -1269,7 +1268,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
|||||||
u64 flags = original_sblock->pagev[0]->flags;
|
u64 flags = original_sblock->pagev[0]->flags;
|
||||||
u64 have_csum = original_sblock->pagev[0]->have_csum;
|
u64 have_csum = original_sblock->pagev[0]->have_csum;
|
||||||
struct scrub_recover *recover;
|
struct scrub_recover *recover;
|
||||||
struct btrfs_bio *bbio;
|
struct btrfs_io_context *bioc;
|
||||||
u64 sublen;
|
u64 sublen;
|
||||||
u64 mapped_length;
|
u64 mapped_length;
|
||||||
u64 stripe_offset;
|
u64 stripe_offset;
|
||||||
@@ -1288,7 +1287,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
|||||||
while (length > 0) {
|
while (length > 0) {
|
||||||
sublen = min_t(u64, length, fs_info->sectorsize);
|
sublen = min_t(u64, length, fs_info->sectorsize);
|
||||||
mapped_length = sublen;
|
mapped_length = sublen;
|
||||||
bbio = NULL;
|
bioc = NULL;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* With a length of sectorsize, each returned stripe represents
|
* With a length of sectorsize, each returned stripe represents
|
||||||
@@ -1296,27 +1295,27 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
|
|||||||
*/
|
*/
|
||||||
btrfs_bio_counter_inc_blocked(fs_info);
|
btrfs_bio_counter_inc_blocked(fs_info);
|
||||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
|
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
|
||||||
logical, &mapped_length, &bbio);
|
logical, &mapped_length, &bioc);
|
||||||
if (ret || !bbio || mapped_length < sublen) {
|
if (ret || !bioc || mapped_length < sublen) {
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
|
recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
|
||||||
if (!recover) {
|
if (!recover) {
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
refcount_set(&recover->refs, 1);
|
refcount_set(&recover->refs, 1);
|
||||||
recover->bbio = bbio;
|
recover->bioc = bioc;
|
||||||
recover->map_length = mapped_length;
|
recover->map_length = mapped_length;
|
||||||
|
|
||||||
BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
|
BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK);
|
||||||
|
|
||||||
nmirrors = min(scrub_nr_raid_mirrors(bbio), BTRFS_MAX_MIRRORS);
|
nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS);
|
||||||
|
|
||||||
for (mirror_index = 0; mirror_index < nmirrors;
|
for (mirror_index = 0; mirror_index < nmirrors;
|
||||||
mirror_index++) {
|
mirror_index++) {
|
||||||
@@ -1348,17 +1347,17 @@ leave_nomem:
|
|||||||
sctx->fs_info->csum_size);
|
sctx->fs_info->csum_size);
|
||||||
|
|
||||||
scrub_stripe_index_and_offset(logical,
|
scrub_stripe_index_and_offset(logical,
|
||||||
bbio->map_type,
|
bioc->map_type,
|
||||||
bbio->raid_map,
|
bioc->raid_map,
|
||||||
mapped_length,
|
mapped_length,
|
||||||
bbio->num_stripes -
|
bioc->num_stripes -
|
||||||
bbio->num_tgtdevs,
|
bioc->num_tgtdevs,
|
||||||
mirror_index,
|
mirror_index,
|
||||||
&stripe_index,
|
&stripe_index,
|
||||||
&stripe_offset);
|
&stripe_offset);
|
||||||
spage->physical = bbio->stripes[stripe_index].physical +
|
spage->physical = bioc->stripes[stripe_index].physical +
|
||||||
stripe_offset;
|
stripe_offset;
|
||||||
spage->dev = bbio->stripes[stripe_index].dev;
|
spage->dev = bioc->stripes[stripe_index].dev;
|
||||||
|
|
||||||
BUG_ON(page_index >= original_sblock->page_count);
|
BUG_ON(page_index >= original_sblock->page_count);
|
||||||
spage->physical_for_dev_replace =
|
spage->physical_for_dev_replace =
|
||||||
@@ -1401,7 +1400,7 @@ static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
|
|||||||
bio->bi_end_io = scrub_bio_wait_endio;
|
bio->bi_end_io = scrub_bio_wait_endio;
|
||||||
|
|
||||||
mirror_num = spage->sblock->pagev[0]->mirror_num;
|
mirror_num = spage->sblock->pagev[0]->mirror_num;
|
||||||
ret = raid56_parity_recover(fs_info, bio, spage->recover->bbio,
|
ret = raid56_parity_recover(bio, spage->recover->bioc,
|
||||||
spage->recover->map_length,
|
spage->recover->map_length,
|
||||||
mirror_num, 0);
|
mirror_num, 0);
|
||||||
if (ret)
|
if (ret)
|
||||||
@@ -1423,7 +1422,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
|
|||||||
if (!first_page->dev->bdev)
|
if (!first_page->dev->bdev)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(BIO_MAX_VECS);
|
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||||
bio_set_dev(bio, first_page->dev->bdev);
|
bio_set_dev(bio, first_page->dev->bdev);
|
||||||
|
|
||||||
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
for (page_num = 0; page_num < sblock->page_count; page_num++) {
|
||||||
@@ -1480,7 +1479,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
|
|||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(!spage->page);
|
WARN_ON(!spage->page);
|
||||||
bio = btrfs_io_bio_alloc(1);
|
bio = btrfs_bio_alloc(1);
|
||||||
bio_set_dev(bio, spage->dev->bdev);
|
bio_set_dev(bio, spage->dev->bdev);
|
||||||
|
|
||||||
bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
|
bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
|
||||||
@@ -1562,7 +1561,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
|
|||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(1);
|
bio = btrfs_bio_alloc(1);
|
||||||
bio_set_dev(bio, spage_bad->dev->bdev);
|
bio_set_dev(bio, spage_bad->dev->bdev);
|
||||||
bio->bi_iter.bi_sector = spage_bad->physical >> 9;
|
bio->bi_iter.bi_sector = spage_bad->physical >> 9;
|
||||||
bio->bi_opf = REQ_OP_WRITE;
|
bio->bi_opf = REQ_OP_WRITE;
|
||||||
@@ -1676,7 +1675,7 @@ again:
|
|||||||
sbio->dev = sctx->wr_tgtdev;
|
sbio->dev = sctx->wr_tgtdev;
|
||||||
bio = sbio->bio;
|
bio = sbio->bio;
|
||||||
if (!bio) {
|
if (!bio) {
|
||||||
bio = btrfs_io_bio_alloc(sctx->pages_per_wr_bio);
|
bio = btrfs_bio_alloc(sctx->pages_per_wr_bio);
|
||||||
sbio->bio = bio;
|
sbio->bio = bio;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2102,7 +2101,7 @@ again:
|
|||||||
sbio->dev = spage->dev;
|
sbio->dev = spage->dev;
|
||||||
bio = sbio->bio;
|
bio = sbio->bio;
|
||||||
if (!bio) {
|
if (!bio) {
|
||||||
bio = btrfs_io_bio_alloc(sctx->pages_per_rd_bio);
|
bio = btrfs_bio_alloc(sctx->pages_per_rd_bio);
|
||||||
sbio->bio = bio;
|
sbio->bio = bio;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2203,7 +2202,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
|||||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||||
u64 length = sblock->page_count * PAGE_SIZE;
|
u64 length = sblock->page_count * PAGE_SIZE;
|
||||||
u64 logical = sblock->pagev[0]->logical;
|
u64 logical = sblock->pagev[0]->logical;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
int ret;
|
int ret;
|
||||||
@@ -2211,27 +2210,27 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
|||||||
|
|
||||||
btrfs_bio_counter_inc_blocked(fs_info);
|
btrfs_bio_counter_inc_blocked(fs_info);
|
||||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||||
&length, &bbio);
|
&length, &bioc);
|
||||||
if (ret || !bbio || !bbio->raid_map)
|
if (ret || !bioc || !bioc->raid_map)
|
||||||
goto bbio_out;
|
goto bioc_out;
|
||||||
|
|
||||||
if (WARN_ON(!sctx->is_dev_replace ||
|
if (WARN_ON(!sctx->is_dev_replace ||
|
||||||
!(bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
|
!(bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK))) {
|
||||||
/*
|
/*
|
||||||
* We shouldn't be scrubbing a missing device. Even for dev
|
* We shouldn't be scrubbing a missing device. Even for dev
|
||||||
* replace, we should only get here for RAID 5/6. We either
|
* replace, we should only get here for RAID 5/6. We either
|
||||||
* managed to mount something with no mirrors remaining or
|
* managed to mount something with no mirrors remaining or
|
||||||
* there's a bug in scrub_remap_extent()/btrfs_map_block().
|
* there's a bug in scrub_remap_extent()/btrfs_map_block().
|
||||||
*/
|
*/
|
||||||
goto bbio_out;
|
goto bioc_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(0);
|
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||||
bio->bi_iter.bi_sector = logical >> 9;
|
bio->bi_iter.bi_sector = logical >> 9;
|
||||||
bio->bi_private = sblock;
|
bio->bi_private = sblock;
|
||||||
bio->bi_end_io = scrub_missing_raid56_end_io;
|
bio->bi_end_io = scrub_missing_raid56_end_io;
|
||||||
|
|
||||||
rbio = raid56_alloc_missing_rbio(fs_info, bio, bbio, length);
|
rbio = raid56_alloc_missing_rbio(bio, bioc, length);
|
||||||
if (!rbio)
|
if (!rbio)
|
||||||
goto rbio_out;
|
goto rbio_out;
|
||||||
|
|
||||||
@@ -2249,9 +2248,9 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
|
|||||||
|
|
||||||
rbio_out:
|
rbio_out:
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
bbio_out:
|
bioc_out:
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
spin_lock(&sctx->stat_lock);
|
spin_lock(&sctx->stat_lock);
|
||||||
sctx->stat.malloc_errors++;
|
sctx->stat.malloc_errors++;
|
||||||
spin_unlock(&sctx->stat_lock);
|
spin_unlock(&sctx->stat_lock);
|
||||||
@@ -2826,7 +2825,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
|||||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
struct btrfs_raid_bio *rbio;
|
struct btrfs_raid_bio *rbio;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
u64 length;
|
u64 length;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@@ -2838,17 +2837,17 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
|||||||
|
|
||||||
btrfs_bio_counter_inc_blocked(fs_info);
|
btrfs_bio_counter_inc_blocked(fs_info);
|
||||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
|
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start,
|
||||||
&length, &bbio);
|
&length, &bioc);
|
||||||
if (ret || !bbio || !bbio->raid_map)
|
if (ret || !bioc || !bioc->raid_map)
|
||||||
goto bbio_out;
|
goto bioc_out;
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(0);
|
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||||
bio->bi_iter.bi_sector = sparity->logic_start >> 9;
|
bio->bi_iter.bi_sector = sparity->logic_start >> 9;
|
||||||
bio->bi_private = sparity;
|
bio->bi_private = sparity;
|
||||||
bio->bi_end_io = scrub_parity_bio_endio;
|
bio->bi_end_io = scrub_parity_bio_endio;
|
||||||
|
|
||||||
rbio = raid56_parity_alloc_scrub_rbio(fs_info, bio, bbio,
|
rbio = raid56_parity_alloc_scrub_rbio(bio, bioc, length,
|
||||||
length, sparity->scrub_dev,
|
sparity->scrub_dev,
|
||||||
sparity->dbitmap,
|
sparity->dbitmap,
|
||||||
sparity->nsectors);
|
sparity->nsectors);
|
||||||
if (!rbio)
|
if (!rbio)
|
||||||
@@ -2860,9 +2859,9 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
|
|||||||
|
|
||||||
rbio_out:
|
rbio_out:
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
bbio_out:
|
bioc_out:
|
||||||
btrfs_bio_counter_dec(fs_info);
|
btrfs_bio_counter_dec(fs_info);
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
|
bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
|
||||||
sparity->nsectors);
|
sparity->nsectors);
|
||||||
spin_lock(&sctx->stat_lock);
|
spin_lock(&sctx->stat_lock);
|
||||||
@@ -2901,7 +2900,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
|
|||||||
struct btrfs_root *root = fs_info->extent_root;
|
struct btrfs_root *root = fs_info->extent_root;
|
||||||
struct btrfs_root *csum_root = fs_info->csum_root;
|
struct btrfs_root *csum_root = fs_info->csum_root;
|
||||||
struct btrfs_extent_item *extent;
|
struct btrfs_extent_item *extent;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
u64 flags;
|
u64 flags;
|
||||||
int ret;
|
int ret;
|
||||||
int slot;
|
int slot;
|
||||||
@@ -3044,22 +3043,22 @@ again:
|
|||||||
extent_len);
|
extent_len);
|
||||||
|
|
||||||
mapped_length = extent_len;
|
mapped_length = extent_len;
|
||||||
bbio = NULL;
|
bioc = NULL;
|
||||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
|
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
|
||||||
extent_logical, &mapped_length, &bbio,
|
extent_logical, &mapped_length, &bioc,
|
||||||
0);
|
0);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
if (!bbio || mapped_length < extent_len)
|
if (!bioc || mapped_length < extent_len)
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
}
|
}
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
extent_physical = bbio->stripes[0].physical;
|
extent_physical = bioc->stripes[0].physical;
|
||||||
extent_mirror_num = bbio->mirror_num;
|
extent_mirror_num = bioc->mirror_num;
|
||||||
extent_dev = bbio->stripes[0].dev;
|
extent_dev = bioc->stripes[0].dev;
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
|
|
||||||
ret = btrfs_lookup_csums_range(csum_root,
|
ret = btrfs_lookup_csums_range(csum_root,
|
||||||
extent_logical,
|
extent_logical,
|
||||||
@@ -3956,7 +3955,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
|
|||||||
int ret;
|
int ret;
|
||||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||||
|
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
|
if (BTRFS_FS_ERROR(fs_info))
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
|
|
||||||
/* Seed devices of a new filesystem has their own generation. */
|
/* Seed devices of a new filesystem has their own generation. */
|
||||||
@@ -4068,6 +4067,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
|||||||
u64 end, struct btrfs_scrub_progress *progress,
|
u64 end, struct btrfs_scrub_progress *progress,
|
||||||
int readonly, int is_dev_replace)
|
int readonly, int is_dev_replace)
|
||||||
{
|
{
|
||||||
|
struct btrfs_dev_lookup_args args = { .devid = devid };
|
||||||
struct scrub_ctx *sctx;
|
struct scrub_ctx *sctx;
|
||||||
int ret;
|
int ret;
|
||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
@@ -4115,7 +4115,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
|||||||
goto out_free_ctx;
|
goto out_free_ctx;
|
||||||
|
|
||||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
dev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||||
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
|
if (!dev || (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) &&
|
||||||
!is_dev_replace)) {
|
!is_dev_replace)) {
|
||||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||||
@@ -4288,11 +4288,12 @@ int btrfs_scrub_cancel_dev(struct btrfs_device *dev)
|
|||||||
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||||
struct btrfs_scrub_progress *progress)
|
struct btrfs_scrub_progress *progress)
|
||||||
{
|
{
|
||||||
|
struct btrfs_dev_lookup_args args = { .devid = devid };
|
||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
struct scrub_ctx *sctx = NULL;
|
struct scrub_ctx *sctx = NULL;
|
||||||
|
|
||||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||||
dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
|
dev = btrfs_find_device(fs_info->fs_devices, &args);
|
||||||
if (dev)
|
if (dev)
|
||||||
sctx = dev->scrub_ctx;
|
sctx = dev->scrub_ctx;
|
||||||
if (sctx)
|
if (sctx)
|
||||||
@@ -4309,20 +4310,20 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info,
|
|||||||
int *extent_mirror_num)
|
int *extent_mirror_num)
|
||||||
{
|
{
|
||||||
u64 mapped_length;
|
u64 mapped_length;
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
mapped_length = extent_len;
|
mapped_length = extent_len;
|
||||||
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
|
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, extent_logical,
|
||||||
&mapped_length, &bbio, 0);
|
&mapped_length, &bioc, 0);
|
||||||
if (ret || !bbio || mapped_length < extent_len ||
|
if (ret || !bioc || mapped_length < extent_len ||
|
||||||
!bbio->stripes[0].dev->bdev) {
|
!bioc->stripes[0].dev->bdev) {
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
*extent_physical = bbio->stripes[0].physical;
|
*extent_physical = bioc->stripes[0].physical;
|
||||||
*extent_mirror_num = bbio->mirror_num;
|
*extent_mirror_num = bioc->mirror_num;
|
||||||
*extent_dev = bbio->stripes[0].dev;
|
*extent_dev = bioc->stripes[0].dev;
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -84,6 +84,8 @@ struct send_ctx {
|
|||||||
u64 total_send_size;
|
u64 total_send_size;
|
||||||
u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
|
u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
|
||||||
u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
|
u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
|
||||||
|
/* Protocol version compatibility requested */
|
||||||
|
u32 proto;
|
||||||
|
|
||||||
struct btrfs_root *send_root;
|
struct btrfs_root *send_root;
|
||||||
struct btrfs_root *parent_root;
|
struct btrfs_root *parent_root;
|
||||||
@@ -312,6 +314,16 @@ static void inconsistent_snapshot_error(struct send_ctx *sctx,
|
|||||||
sctx->parent_root->root_key.objectid : 0));
|
sctx->parent_root->root_key.objectid : 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__maybe_unused
|
||||||
|
static bool proto_cmd_ok(const struct send_ctx *sctx, int cmd)
|
||||||
|
{
|
||||||
|
switch (sctx->proto) {
|
||||||
|
case 1: return cmd < __BTRFS_SEND_C_MAX_V1;
|
||||||
|
case 2: return cmd < __BTRFS_SEND_C_MAX_V2;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
|
static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
|
||||||
|
|
||||||
static struct waiting_dir_move *
|
static struct waiting_dir_move *
|
||||||
@@ -2720,19 +2732,12 @@ static int send_create_inode_if_needed(struct send_ctx *sctx)
|
|||||||
if (S_ISDIR(sctx->cur_inode_mode)) {
|
if (S_ISDIR(sctx->cur_inode_mode)) {
|
||||||
ret = did_create_dir(sctx, sctx->cur_ino);
|
ret = did_create_dir(sctx, sctx->cur_ino);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
|
||||||
if (ret) {
|
|
||||||
ret = 0;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = send_create_inode(sctx, sctx->cur_ino);
|
|
||||||
if (ret < 0)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
out:
|
|
||||||
return ret;
|
return ret;
|
||||||
|
else if (ret > 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return send_create_inode(sctx, sctx->cur_ino);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct recorded_ref {
|
struct recorded_ref {
|
||||||
@@ -7276,6 +7281,17 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
|||||||
|
|
||||||
sctx->flags = arg->flags;
|
sctx->flags = arg->flags;
|
||||||
|
|
||||||
|
if (arg->flags & BTRFS_SEND_FLAG_VERSION) {
|
||||||
|
if (arg->version > BTRFS_SEND_STREAM_VERSION) {
|
||||||
|
ret = -EPROTO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
/* Zero means "use the highest version" */
|
||||||
|
sctx->proto = arg->version ?: BTRFS_SEND_STREAM_VERSION;
|
||||||
|
} else {
|
||||||
|
sctx->proto = 1;
|
||||||
|
}
|
||||||
|
|
||||||
sctx->send_filp = fget(arg->send_fd);
|
sctx->send_filp = fget(arg->send_fd);
|
||||||
if (!sctx->send_filp) {
|
if (!sctx->send_filp) {
|
||||||
ret = -EBADF;
|
ret = -EBADF;
|
||||||
|
|||||||
@@ -48,6 +48,7 @@ struct btrfs_tlv_header {
|
|||||||
enum btrfs_send_cmd {
|
enum btrfs_send_cmd {
|
||||||
BTRFS_SEND_C_UNSPEC,
|
BTRFS_SEND_C_UNSPEC,
|
||||||
|
|
||||||
|
/* Version 1 */
|
||||||
BTRFS_SEND_C_SUBVOL,
|
BTRFS_SEND_C_SUBVOL,
|
||||||
BTRFS_SEND_C_SNAPSHOT,
|
BTRFS_SEND_C_SNAPSHOT,
|
||||||
|
|
||||||
@@ -76,6 +77,12 @@ enum btrfs_send_cmd {
|
|||||||
|
|
||||||
BTRFS_SEND_C_END,
|
BTRFS_SEND_C_END,
|
||||||
BTRFS_SEND_C_UPDATE_EXTENT,
|
BTRFS_SEND_C_UPDATE_EXTENT,
|
||||||
|
__BTRFS_SEND_C_MAX_V1,
|
||||||
|
|
||||||
|
/* Version 2 */
|
||||||
|
__BTRFS_SEND_C_MAX_V2,
|
||||||
|
|
||||||
|
/* End */
|
||||||
__BTRFS_SEND_C_MAX,
|
__BTRFS_SEND_C_MAX,
|
||||||
};
|
};
|
||||||
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
|
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
|
||||||
|
|||||||
@@ -885,6 +885,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
|
|||||||
{
|
{
|
||||||
struct reserve_ticket *ticket;
|
struct reserve_ticket *ticket;
|
||||||
u64 tickets_id = space_info->tickets_id;
|
u64 tickets_id = space_info->tickets_id;
|
||||||
|
const bool aborted = BTRFS_FS_ERROR(fs_info);
|
||||||
|
|
||||||
trace_btrfs_fail_all_tickets(fs_info, space_info);
|
trace_btrfs_fail_all_tickets(fs_info, space_info);
|
||||||
|
|
||||||
@@ -898,15 +899,18 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
|
|||||||
ticket = list_first_entry(&space_info->tickets,
|
ticket = list_first_entry(&space_info->tickets,
|
||||||
struct reserve_ticket, list);
|
struct reserve_ticket, list);
|
||||||
|
|
||||||
if (ticket->steal &&
|
if (!aborted && ticket->steal &&
|
||||||
steal_from_global_rsv(fs_info, space_info, ticket))
|
steal_from_global_rsv(fs_info, space_info, ticket))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
|
if (!aborted && btrfs_test_opt(fs_info, ENOSPC_DEBUG))
|
||||||
btrfs_info(fs_info, "failing ticket with %llu bytes",
|
btrfs_info(fs_info, "failing ticket with %llu bytes",
|
||||||
ticket->bytes);
|
ticket->bytes);
|
||||||
|
|
||||||
remove_ticket(space_info, ticket);
|
remove_ticket(space_info, ticket);
|
||||||
|
if (aborted)
|
||||||
|
ticket->error = -EIO;
|
||||||
|
else
|
||||||
ticket->error = -ENOSPC;
|
ticket->error = -ENOSPC;
|
||||||
wake_up(&ticket->wait);
|
wake_up(&ticket->wait);
|
||||||
|
|
||||||
@@ -916,6 +920,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
|
|||||||
* here to see if we can make progress with the next ticket in
|
* here to see if we can make progress with the next ticket in
|
||||||
* the list.
|
* the list.
|
||||||
*/
|
*/
|
||||||
|
if (!aborted)
|
||||||
btrfs_try_granting_tickets(fs_info, space_info);
|
btrfs_try_granting_tickets(fs_info, space_info);
|
||||||
}
|
}
|
||||||
return (tickets_id != space_info->tickets_id);
|
return (tickets_id != space_info->tickets_id);
|
||||||
@@ -1172,6 +1177,10 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work)
|
|||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Something happened, fail everything and bail. */
|
||||||
|
if (BTRFS_FS_ERROR(fs_info))
|
||||||
|
goto aborted_fs;
|
||||||
last_tickets_id = space_info->tickets_id;
|
last_tickets_id = space_info->tickets_id;
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
}
|
}
|
||||||
@@ -1202,9 +1211,20 @@ static void btrfs_async_reclaim_data_space(struct work_struct *work)
|
|||||||
} else {
|
} else {
|
||||||
flush_state = 0;
|
flush_state = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Something happened, fail everything and bail. */
|
||||||
|
if (BTRFS_FS_ERROR(fs_info))
|
||||||
|
goto aborted_fs;
|
||||||
|
|
||||||
}
|
}
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
}
|
}
|
||||||
|
return;
|
||||||
|
|
||||||
|
aborted_fs:
|
||||||
|
maybe_fail_all_tickets(fs_info, space_info);
|
||||||
|
space_info->flush = 0;
|
||||||
|
spin_unlock(&space_info->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
|
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info)
|
||||||
|
|||||||
@@ -63,11 +63,41 @@
|
|||||||
* This means a slightly higher tree locking latency.
|
* This means a slightly higher tree locking latency.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize)
|
||||||
|
{
|
||||||
|
unsigned int cur = 0;
|
||||||
|
unsigned int nr_bits;
|
||||||
|
|
||||||
|
ASSERT(IS_ALIGNED(PAGE_SIZE, sectorsize));
|
||||||
|
|
||||||
|
nr_bits = PAGE_SIZE / sectorsize;
|
||||||
|
subpage_info->bitmap_nr_bits = nr_bits;
|
||||||
|
|
||||||
|
subpage_info->uptodate_offset = cur;
|
||||||
|
cur += nr_bits;
|
||||||
|
|
||||||
|
subpage_info->error_offset = cur;
|
||||||
|
cur += nr_bits;
|
||||||
|
|
||||||
|
subpage_info->dirty_offset = cur;
|
||||||
|
cur += nr_bits;
|
||||||
|
|
||||||
|
subpage_info->writeback_offset = cur;
|
||||||
|
cur += nr_bits;
|
||||||
|
|
||||||
|
subpage_info->ordered_offset = cur;
|
||||||
|
cur += nr_bits;
|
||||||
|
|
||||||
|
subpage_info->checked_offset = cur;
|
||||||
|
cur += nr_bits;
|
||||||
|
|
||||||
|
subpage_info->total_nr_bits = cur;
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
||||||
struct page *page, enum btrfs_subpage_type type)
|
struct page *page, enum btrfs_subpage_type type)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = NULL;
|
struct btrfs_subpage *subpage;
|
||||||
int ret;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have cases like a dummy extent buffer page, which is not mappped
|
* We have cases like a dummy extent buffer page, which is not mappped
|
||||||
@@ -75,13 +105,15 @@ int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
|||||||
*/
|
*/
|
||||||
if (page->mapping)
|
if (page->mapping)
|
||||||
ASSERT(PageLocked(page));
|
ASSERT(PageLocked(page));
|
||||||
|
|
||||||
/* Either not subpage, or the page already has private attached */
|
/* Either not subpage, or the page already has private attached */
|
||||||
if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page))
|
if (fs_info->sectorsize == PAGE_SIZE || PagePrivate(page))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
ret = btrfs_alloc_subpage(fs_info, &subpage, type);
|
subpage = btrfs_alloc_subpage(fs_info, type);
|
||||||
if (ret < 0)
|
if (IS_ERR(subpage))
|
||||||
return ret;
|
return PTR_ERR(subpage);
|
||||||
|
|
||||||
attach_page_private(page, subpage);
|
attach_page_private(page, subpage);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -100,24 +132,28 @@ void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
|||||||
btrfs_free_subpage(subpage);
|
btrfs_free_subpage(subpage);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_subpage **ret,
|
|
||||||
enum btrfs_subpage_type type)
|
enum btrfs_subpage_type type)
|
||||||
{
|
{
|
||||||
if (fs_info->sectorsize == PAGE_SIZE)
|
struct btrfs_subpage *ret;
|
||||||
return 0;
|
unsigned int real_size;
|
||||||
|
|
||||||
*ret = kzalloc(sizeof(struct btrfs_subpage), GFP_NOFS);
|
ASSERT(fs_info->sectorsize < PAGE_SIZE);
|
||||||
if (!*ret)
|
|
||||||
return -ENOMEM;
|
real_size = struct_size(ret, bitmaps,
|
||||||
spin_lock_init(&(*ret)->lock);
|
BITS_TO_LONGS(fs_info->subpage_info->total_nr_bits));
|
||||||
|
ret = kzalloc(real_size, GFP_NOFS);
|
||||||
|
if (!ret)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
spin_lock_init(&ret->lock);
|
||||||
if (type == BTRFS_SUBPAGE_METADATA) {
|
if (type == BTRFS_SUBPAGE_METADATA) {
|
||||||
atomic_set(&(*ret)->eb_refs, 0);
|
atomic_set(&ret->eb_refs, 0);
|
||||||
} else {
|
} else {
|
||||||
atomic_set(&(*ret)->readers, 0);
|
atomic_set(&ret->readers, 0);
|
||||||
atomic_set(&(*ret)->writers, 0);
|
atomic_set(&ret->writers, 0);
|
||||||
}
|
}
|
||||||
return 0;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_free_subpage(struct btrfs_subpage *subpage)
|
void btrfs_free_subpage(struct btrfs_subpage *subpage)
|
||||||
@@ -222,6 +258,14 @@ static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len)
|
|||||||
u32 orig_len = *len;
|
u32 orig_len = *len;
|
||||||
|
|
||||||
*start = max_t(u64, page_offset(page), orig_start);
|
*start = max_t(u64, page_offset(page), orig_start);
|
||||||
|
/*
|
||||||
|
* For certain call sites like btrfs_drop_pages(), we may have pages
|
||||||
|
* beyond the target range. In that case, just set @len to 0, subpage
|
||||||
|
* helpers can handle @len == 0 without any problem.
|
||||||
|
*/
|
||||||
|
if (page_offset(page) >= orig_start + orig_len)
|
||||||
|
*len = 0;
|
||||||
|
else
|
||||||
*len = min_t(u64, page_offset(page) + PAGE_SIZE,
|
*len = min_t(u64, page_offset(page) + PAGE_SIZE,
|
||||||
orig_start + orig_len) - *start;
|
orig_start + orig_len) - *start;
|
||||||
}
|
}
|
||||||
@@ -248,6 +292,16 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
btrfs_subpage_assert(fs_info, page, start, len);
|
btrfs_subpage_assert(fs_info, page, start, len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have call sites passing @lock_page into
|
||||||
|
* extent_clear_unlock_delalloc() for compression path.
|
||||||
|
*
|
||||||
|
* This @locked_page is locked by plain lock_page(), thus its
|
||||||
|
* subpage::writers is 0. Handle them in a special way.
|
||||||
|
*/
|
||||||
|
if (atomic_read(&subpage->writers) == 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
ASSERT(atomic_read(&subpage->writers) >= nbits);
|
ASSERT(atomic_read(&subpage->writers) >= nbits);
|
||||||
return atomic_sub_and_test(nbits, &subpage->writers);
|
return atomic_sub_and_test(nbits, &subpage->writers);
|
||||||
}
|
}
|
||||||
@@ -289,37 +343,59 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
|||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start,
|
||||||
* Convert the [start, start + len) range into a u16 bitmap
|
unsigned int nbits)
|
||||||
*
|
|
||||||
* For example: if start == page_offset() + 16K, len = 16K, we get 0x00f0.
|
|
||||||
*/
|
|
||||||
static u16 btrfs_subpage_calc_bitmap(const struct btrfs_fs_info *fs_info,
|
|
||||||
struct page *page, u64 start, u32 len)
|
|
||||||
{
|
{
|
||||||
const int bit_start = offset_in_page(start) >> fs_info->sectorsize_bits;
|
unsigned int found_zero;
|
||||||
const int nbits = len >> fs_info->sectorsize_bits;
|
|
||||||
|
|
||||||
btrfs_subpage_assert(fs_info, page, start, len);
|
found_zero = find_next_zero_bit(addr, start + nbits, start);
|
||||||
|
if (found_zero == start + nbits)
|
||||||
/*
|
return true;
|
||||||
* Here nbits can be 16, thus can go beyond u16 range. We make the
|
return false;
|
||||||
* first left shift to be calculate in unsigned long (at least u32),
|
|
||||||
* then truncate the result to u16.
|
|
||||||
*/
|
|
||||||
return (u16)(((1UL << nbits) - 1) << bit_start);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start,
|
||||||
|
unsigned int nbits)
|
||||||
|
{
|
||||||
|
unsigned int found_set;
|
||||||
|
|
||||||
|
found_set = find_next_bit(addr, start + nbits, start);
|
||||||
|
if (found_set == start + nbits)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define subpage_calc_start_bit(fs_info, page, name, start, len) \
|
||||||
|
({ \
|
||||||
|
unsigned int start_bit; \
|
||||||
|
\
|
||||||
|
btrfs_subpage_assert(fs_info, page, start, len); \
|
||||||
|
start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
|
||||||
|
start_bit += fs_info->subpage_info->name##_offset; \
|
||||||
|
start_bit; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
|
||||||
|
bitmap_test_range_all_set(subpage->bitmaps, \
|
||||||
|
fs_info->subpage_info->name##_offset, \
|
||||||
|
fs_info->subpage_info->bitmap_nr_bits)
|
||||||
|
|
||||||
|
#define subpage_test_bitmap_all_zero(fs_info, subpage, name) \
|
||||||
|
bitmap_test_range_all_zero(subpage->bitmaps, \
|
||||||
|
fs_info->subpage_info->name##_offset, \
|
||||||
|
fs_info->subpage_info->bitmap_nr_bits)
|
||||||
|
|
||||||
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
|
void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
|
||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
uptodate, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->uptodate_bitmap |= tmp;
|
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
if (subpage->uptodate_bitmap == U16_MAX)
|
if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate))
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
@@ -328,11 +404,12 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
uptodate, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->uptodate_bitmap &= ~tmp;
|
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
ClearPageUptodate(page);
|
ClearPageUptodate(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
@@ -341,11 +418,12 @@ void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
error, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->error_bitmap |= tmp;
|
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
SetPageError(page);
|
SetPageError(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
@@ -354,12 +432,13 @@ void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
error, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->error_bitmap &= ~tmp;
|
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
if (subpage->error_bitmap == 0)
|
if (subpage_test_bitmap_all_zero(fs_info, subpage, error))
|
||||||
ClearPageError(page);
|
ClearPageError(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
@@ -368,11 +447,12 @@ void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
dirty, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->dirty_bitmap |= tmp;
|
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
}
|
}
|
||||||
@@ -391,13 +471,14 @@ bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
dirty, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
bool last = false;
|
bool last = false;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->dirty_bitmap &= ~tmp;
|
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
if (subpage->dirty_bitmap == 0)
|
if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty))
|
||||||
last = true;
|
last = true;
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
return last;
|
return last;
|
||||||
@@ -417,11 +498,12 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
writeback, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->writeback_bitmap |= tmp;
|
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
set_page_writeback(page);
|
set_page_writeback(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
@@ -430,12 +512,13 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
writeback, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->writeback_bitmap &= ~tmp;
|
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
if (subpage->writeback_bitmap == 0) {
|
if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) {
|
||||||
ASSERT(PageWriteback(page));
|
ASSERT(PageWriteback(page));
|
||||||
end_page_writeback(page);
|
end_page_writeback(page);
|
||||||
}
|
}
|
||||||
@@ -446,11 +529,12 @@ void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
ordered, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->ordered_bitmap |= tmp;
|
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
SetPageOrdered(page);
|
SetPageOrdered(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
@@ -459,15 +543,46 @@ void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
|
|||||||
struct page *page, u64 start, u32 len)
|
struct page *page, u64 start, u32 len)
|
||||||
{
|
{
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
ordered, start, len);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
spin_lock_irqsave(&subpage->lock, flags);
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
subpage->ordered_bitmap &= ~tmp;
|
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
if (subpage->ordered_bitmap == 0)
|
if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered))
|
||||||
ClearPageOrdered(page);
|
ClearPageOrdered(page);
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
|
||||||
|
struct page *page, u64 start, u32 len)
|
||||||
|
{
|
||||||
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
checked, start, len);
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
|
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
|
if (subpage_test_bitmap_all_set(fs_info, subpage, checked))
|
||||||
|
SetPageChecked(page);
|
||||||
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
|
||||||
|
struct page *page, u64 start, u32 len)
|
||||||
|
{
|
||||||
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
|
||||||
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
|
||||||
|
checked, start, len);
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&subpage->lock, flags);
|
||||||
|
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
|
||||||
|
ClearPageChecked(page);
|
||||||
|
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unlike set/clear which is dependent on each page status, for test all bits
|
* Unlike set/clear which is dependent on each page status, for test all bits
|
||||||
* are tested in the same way.
|
* are tested in the same way.
|
||||||
@@ -477,12 +592,14 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
|
|||||||
struct page *page, u64 start, u32 len) \
|
struct page *page, u64 start, u32 len) \
|
||||||
{ \
|
{ \
|
||||||
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \
|
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private; \
|
||||||
const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len); \
|
unsigned int start_bit = subpage_calc_start_bit(fs_info, page, \
|
||||||
|
name, start, len); \
|
||||||
unsigned long flags; \
|
unsigned long flags; \
|
||||||
bool ret; \
|
bool ret; \
|
||||||
\
|
\
|
||||||
spin_lock_irqsave(&subpage->lock, flags); \
|
spin_lock_irqsave(&subpage->lock, flags); \
|
||||||
ret = ((subpage->name##_bitmap & tmp) == tmp); \
|
ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit, \
|
||||||
|
len >> fs_info->sectorsize_bits); \
|
||||||
spin_unlock_irqrestore(&subpage->lock, flags); \
|
spin_unlock_irqrestore(&subpage->lock, flags); \
|
||||||
return ret; \
|
return ret; \
|
||||||
}
|
}
|
||||||
@@ -491,6 +608,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
|
|||||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
|
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
|
||||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
|
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
|
||||||
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
|
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
|
||||||
|
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that, in selftests (extent-io-tests), we can have empty fs_info passed
|
* Note that, in selftests (extent-io-tests), we can have empty fs_info passed
|
||||||
@@ -561,6 +679,7 @@ IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
|
|||||||
PageWriteback);
|
PageWriteback);
|
||||||
IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
|
IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
|
||||||
PageOrdered);
|
PageOrdered);
|
||||||
|
IMPLEMENT_BTRFS_PAGE_OPS(checked, SetPageChecked, ClearPageChecked, PageChecked);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit
|
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit
|
||||||
@@ -579,5 +698,48 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
ASSERT(PagePrivate(page) && page->private);
|
ASSERT(PagePrivate(page) && page->private);
|
||||||
ASSERT(subpage->dirty_bitmap == 0);
|
ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle different locked pages with different page sizes:
|
||||||
|
*
|
||||||
|
* - Page locked by plain lock_page()
|
||||||
|
* It should not have any subpage::writers count.
|
||||||
|
* Can be unlocked by unlock_page().
|
||||||
|
* This is the most common locked page for __extent_writepage() called
|
||||||
|
* inside extent_write_cache_pages() or extent_write_full_page().
|
||||||
|
* Rarer cases include the @locked_page from extent_write_locked_range().
|
||||||
|
*
|
||||||
|
* - Page locked by lock_delalloc_pages()
|
||||||
|
* There is only one caller, all pages except @locked_page for
|
||||||
|
* extent_write_locked_range().
|
||||||
|
* In this case, we have to call subpage helper to handle the case.
|
||||||
|
*/
|
||||||
|
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
|
||||||
|
u64 start, u32 len)
|
||||||
|
{
|
||||||
|
struct btrfs_subpage *subpage;
|
||||||
|
|
||||||
|
ASSERT(PageLocked(page));
|
||||||
|
/* For regular page size case, we just unlock the page */
|
||||||
|
if (fs_info->sectorsize == PAGE_SIZE)
|
||||||
|
return unlock_page(page);
|
||||||
|
|
||||||
|
ASSERT(PagePrivate(page) && page->private);
|
||||||
|
subpage = (struct btrfs_subpage *)page->private;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For subpage case, there are two types of locked page. With or
|
||||||
|
* without writers number.
|
||||||
|
*
|
||||||
|
* Since we own the page lock, no one else could touch subpage::writers
|
||||||
|
* and we are safe to do several atomic operations without spinlock.
|
||||||
|
*/
|
||||||
|
if (atomic_read(&subpage->writers))
|
||||||
|
/* No writers, locked by plain lock_page() */
|
||||||
|
return unlock_page(page);
|
||||||
|
|
||||||
|
/* Have writers, use proper subpage helper to end it */
|
||||||
|
btrfs_page_end_writer_lock(fs_info, page, start, len);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,10 +6,38 @@
|
|||||||
#include <linux/spinlock.h>
|
#include <linux/spinlock.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum page size we support is 64K, minimum sector size is 4K, u16 bitmap
|
* Extra info for subpapge bitmap.
|
||||||
* is sufficient. Regular bitmap_* is not used due to size reasons.
|
*
|
||||||
|
* For subpage we pack all uptodate/error/dirty/writeback/ordered bitmaps into
|
||||||
|
* one larger bitmap.
|
||||||
|
*
|
||||||
|
* This structure records how they are organized in the bitmap:
|
||||||
|
*
|
||||||
|
* /- uptodate_offset /- error_offset /- dirty_offset
|
||||||
|
* | | |
|
||||||
|
* v v v
|
||||||
|
* |u|u|u|u|........|u|u|e|e|.......|e|e| ... |o|o|
|
||||||
|
* |<- bitmap_nr_bits ->|
|
||||||
|
* |<--------------- total_nr_bits ---------------->|
|
||||||
*/
|
*/
|
||||||
#define BTRFS_SUBPAGE_BITMAP_SIZE 16
|
struct btrfs_subpage_info {
|
||||||
|
/* Number of bits for each bitmap */
|
||||||
|
unsigned int bitmap_nr_bits;
|
||||||
|
|
||||||
|
/* Total number of bits for the whole bitmap */
|
||||||
|
unsigned int total_nr_bits;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* *_start indicates where the bitmap starts, the length is always
|
||||||
|
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
|
||||||
|
*/
|
||||||
|
unsigned int uptodate_offset;
|
||||||
|
unsigned int error_offset;
|
||||||
|
unsigned int dirty_offset;
|
||||||
|
unsigned int writeback_offset;
|
||||||
|
unsigned int ordered_offset;
|
||||||
|
unsigned int checked_offset;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure to trace status of each sector inside a page, attached to
|
* Structure to trace status of each sector inside a page, attached to
|
||||||
@@ -18,10 +46,6 @@
|
|||||||
struct btrfs_subpage {
|
struct btrfs_subpage {
|
||||||
/* Common members for both data and metadata pages */
|
/* Common members for both data and metadata pages */
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
u16 uptodate_bitmap;
|
|
||||||
u16 error_bitmap;
|
|
||||||
u16 dirty_bitmap;
|
|
||||||
u16 writeback_bitmap;
|
|
||||||
/*
|
/*
|
||||||
* Both data and metadata needs to track how many readers are for the
|
* Both data and metadata needs to track how many readers are for the
|
||||||
* page.
|
* page.
|
||||||
@@ -38,14 +62,11 @@ struct btrfs_subpage {
|
|||||||
* manages whether the subpage can be detached.
|
* manages whether the subpage can be detached.
|
||||||
*/
|
*/
|
||||||
atomic_t eb_refs;
|
atomic_t eb_refs;
|
||||||
/* Structures only used by data */
|
|
||||||
struct {
|
|
||||||
atomic_t writers;
|
|
||||||
|
|
||||||
/* Tracke pending ordered extent in this sector */
|
/* Structures only used by data */
|
||||||
u16 ordered_bitmap;
|
atomic_t writers;
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
unsigned long bitmaps[];
|
||||||
};
|
};
|
||||||
|
|
||||||
enum btrfs_subpage_type {
|
enum btrfs_subpage_type {
|
||||||
@@ -53,14 +74,14 @@ enum btrfs_subpage_type {
|
|||||||
BTRFS_SUBPAGE_DATA,
|
BTRFS_SUBPAGE_DATA,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sectorsize);
|
||||||
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
|
||||||
struct page *page, enum btrfs_subpage_type type);
|
struct page *page, enum btrfs_subpage_type type);
|
||||||
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info,
|
||||||
struct page *page);
|
struct page *page);
|
||||||
|
|
||||||
/* Allocate additional data where page represents more than one sector */
|
/* Allocate additional data where page represents more than one sector */
|
||||||
int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_subpage **ret,
|
|
||||||
enum btrfs_subpage_type type);
|
enum btrfs_subpage_type type);
|
||||||
void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
||||||
|
|
||||||
@@ -122,11 +143,14 @@ DECLARE_BTRFS_SUBPAGE_OPS(error);
|
|||||||
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
|
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
|
||||||
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
|
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
|
||||||
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
|
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
|
||||||
|
DECLARE_BTRFS_SUBPAGE_OPS(checked);
|
||||||
|
|
||||||
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
||||||
struct page *page, u64 start, u32 len);
|
struct page *page, u64 start, u32 len);
|
||||||
|
|
||||||
void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
||||||
struct page *page);
|
struct page *page);
|
||||||
|
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
|
||||||
|
u64 start, u32 len);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1705,7 +1705,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
|
|||||||
goto error_close_devices;
|
goto error_close_devices;
|
||||||
}
|
}
|
||||||
|
|
||||||
bdev = fs_devices->latest_bdev;
|
bdev = fs_devices->latest_dev->bdev;
|
||||||
s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
|
s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
|
||||||
fs_info);
|
fs_info);
|
||||||
if (IS_ERR(s)) {
|
if (IS_ERR(s)) {
|
||||||
@@ -2006,7 +2006,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto restore;
|
goto restore;
|
||||||
} else {
|
} else {
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
if (BTRFS_FS_ERROR(fs_info)) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
"Remounting read-write after error is not allowed");
|
"Remounting read-write after error is not allowed");
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
@@ -2463,30 +2463,16 @@ static int btrfs_unfreeze(struct super_block *sb)
|
|||||||
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
|
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
|
||||||
struct btrfs_device *dev, *first_dev = NULL;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Lightweight locking of the devices. We should not need
|
* There should be always a valid pointer in latest_dev, it may be stale
|
||||||
* device_list_mutex here as we only read the device data and the list
|
* for a short moment in case it's being deleted but still valid until
|
||||||
* is protected by RCU. Even if a device is deleted during the list
|
* the end of RCU grace period.
|
||||||
* traversals, we'll get valid data, the freeing callback will wait at
|
|
||||||
* least until the rcu_read_unlock.
|
|
||||||
*/
|
*/
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
list_for_each_entry_rcu(dev, &fs_info->fs_devices->devices, dev_list) {
|
seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\");
|
||||||
if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
|
|
||||||
continue;
|
|
||||||
if (!dev->name)
|
|
||||||
continue;
|
|
||||||
if (!first_dev || dev->devid < first_dev->devid)
|
|
||||||
first_dev = dev;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (first_dev)
|
|
||||||
seq_escape(m, rcu_str_deref(first_dev->name), " \t\n\\");
|
|
||||||
else
|
|
||||||
WARN_ON(1);
|
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
|
|||||||
} else
|
} else
|
||||||
val = can_modify_feature(fa);
|
val = can_modify_feature(fa);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
return sysfs_emit(buf, "%d\n", val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
|
static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
|
||||||
@@ -330,7 +330,7 @@ static const struct attribute_group btrfs_feature_attr_group = {
|
|||||||
static ssize_t rmdir_subvol_show(struct kobject *kobj,
|
static ssize_t rmdir_subvol_show(struct kobject *kobj,
|
||||||
struct kobj_attribute *ka, char *buf)
|
struct kobj_attribute *ka, char *buf)
|
||||||
{
|
{
|
||||||
return scnprintf(buf, PAGE_SIZE, "0\n");
|
return sysfs_emit(buf, "0\n");
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
|
BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
|
||||||
|
|
||||||
@@ -345,12 +345,12 @@ static ssize_t supported_checksums_show(struct kobject *kobj,
|
|||||||
* This "trick" only works as long as 'enum btrfs_csum_type' has
|
* This "trick" only works as long as 'enum btrfs_csum_type' has
|
||||||
* no holes in it
|
* no holes in it
|
||||||
*/
|
*/
|
||||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
ret += sysfs_emit_at(buf, ret, "%s%s", (i == 0 ? "" : " "),
|
||||||
(i == 0 ? "" : " "), btrfs_super_csum_name(i));
|
btrfs_super_csum_name(i));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
ret += sysfs_emit_at(buf, ret, "\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
|
BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
|
||||||
@@ -358,7 +358,7 @@ BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
|
|||||||
static ssize_t send_stream_version_show(struct kobject *kobj,
|
static ssize_t send_stream_version_show(struct kobject *kobj,
|
||||||
struct kobj_attribute *ka, char *buf)
|
struct kobj_attribute *ka, char *buf)
|
||||||
{
|
{
|
||||||
return snprintf(buf, PAGE_SIZE, "%d\n", BTRFS_SEND_STREAM_VERSION);
|
return sysfs_emit(buf, "%d\n", BTRFS_SEND_STREAM_VERSION);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
|
BTRFS_ATTR(static_feature, send_stream_version, send_stream_version_show);
|
||||||
|
|
||||||
@@ -378,9 +378,8 @@ static ssize_t supported_rescue_options_show(struct kobject *kobj,
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(rescue_opts); i++)
|
for (i = 0; i < ARRAY_SIZE(rescue_opts); i++)
|
||||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
ret += sysfs_emit_at(buf, ret, "%s%s", (i ? " " : ""), rescue_opts[i]);
|
||||||
(i ? " " : ""), rescue_opts[i]);
|
ret += sysfs_emit_at(buf, ret, "\n");
|
||||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(static_feature, supported_rescue_options,
|
BTRFS_ATTR(static_feature, supported_rescue_options,
|
||||||
@@ -394,10 +393,10 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
|
|||||||
|
|
||||||
/* 4K sector size is also supported with 64K page size */
|
/* 4K sector size is also supported with 64K page size */
|
||||||
if (PAGE_SIZE == SZ_64K)
|
if (PAGE_SIZE == SZ_64K)
|
||||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%u ", SZ_4K);
|
ret += sysfs_emit_at(buf, ret, "%u ", SZ_4K);
|
||||||
|
|
||||||
/* Only sectorsize == PAGE_SIZE is now supported */
|
/* Only sectorsize == PAGE_SIZE is now supported */
|
||||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%lu\n", PAGE_SIZE);
|
ret += sysfs_emit_at(buf, ret, "%lu\n", PAGE_SIZE);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -437,7 +436,7 @@ static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
return sysfs_emit(buf, "%lld\n",
|
||||||
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
|
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
|
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
|
||||||
@@ -448,7 +447,7 @@ static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n",
|
return sysfs_emit(buf, "%d\n",
|
||||||
atomic_read(&fs_info->discard_ctl.discardable_extents));
|
atomic_read(&fs_info->discard_ctl.discardable_extents));
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
|
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
|
||||||
@@ -459,7 +458,7 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
return sysfs_emit(buf, "%llu\n",
|
||||||
fs_info->discard_ctl.discard_bitmap_bytes);
|
fs_info->discard_ctl.discard_bitmap_bytes);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
|
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
|
||||||
@@ -470,7 +469,7 @@ static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
return sysfs_emit(buf, "%lld\n",
|
||||||
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
|
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
|
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
|
||||||
@@ -481,7 +480,7 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
return sysfs_emit(buf, "%llu\n",
|
||||||
fs_info->discard_ctl.discard_extent_bytes);
|
fs_info->discard_ctl.discard_extent_bytes);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
|
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
|
||||||
@@ -492,7 +491,7 @@ static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
return sysfs_emit(buf, "%u\n",
|
||||||
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -523,7 +522,7 @@ static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
return sysfs_emit(buf, "%u\n",
|
||||||
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -553,7 +552,7 @@ static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
return sysfs_emit(buf, "%llu\n",
|
||||||
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -627,7 +626,7 @@ static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
|
|||||||
val = *value_ptr;
|
val = *value_ptr;
|
||||||
if (lock)
|
if (lock)
|
||||||
spin_unlock(lock);
|
spin_unlock(lock);
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
return sysfs_emit(buf, "%llu\n", val);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t global_rsv_size_show(struct kobject *kobj,
|
static ssize_t global_rsv_size_show(struct kobject *kobj,
|
||||||
@@ -673,7 +672,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
|
|||||||
val += block_group->used;
|
val += block_group->used;
|
||||||
}
|
}
|
||||||
up_read(&sinfo->groups_sem);
|
up_read(&sinfo->groups_sem);
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
return sysfs_emit(buf, "%llu\n", val);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -771,7 +770,7 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
|
|||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
spin_lock(&fs_info->super_lock);
|
spin_lock(&fs_info->super_lock);
|
||||||
ret = scnprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
|
ret = sysfs_emit(buf, label[0] ? "%s\n" : "%s", label);
|
||||||
spin_unlock(&fs_info->super_lock);
|
spin_unlock(&fs_info->super_lock);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@@ -819,7 +818,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
|
return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize);
|
||||||
}
|
}
|
||||||
|
|
||||||
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
|
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
|
||||||
@@ -829,8 +828,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
|
||||||
fs_info->super_copy->sectorsize);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
|
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
|
||||||
@@ -840,7 +838,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
|
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
|
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
|
||||||
@@ -852,7 +850,7 @@ static ssize_t quota_override_show(struct kobject *kobj,
|
|||||||
int quota_override;
|
int quota_override;
|
||||||
|
|
||||||
quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
|
quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n", quota_override);
|
return sysfs_emit(buf, "%d\n", quota_override);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t quota_override_store(struct kobject *kobj,
|
static ssize_t quota_override_store(struct kobject *kobj,
|
||||||
@@ -890,8 +888,7 @@ static ssize_t btrfs_metadata_uuid_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%pU\n",
|
return sysfs_emit(buf, "%pU\n", fs_info->fs_devices->metadata_uuid);
|
||||||
fs_info->fs_devices->metadata_uuid);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show);
|
BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show);
|
||||||
@@ -902,7 +899,7 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
|
|||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
|
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%s (%s)\n",
|
return sysfs_emit(buf, "%s (%s)\n",
|
||||||
btrfs_super_csum_name(csum_type),
|
btrfs_super_csum_name(csum_type),
|
||||||
crypto_shash_driver_name(fs_info->csum_shash));
|
crypto_shash_driver_name(fs_info->csum_shash));
|
||||||
}
|
}
|
||||||
@@ -941,7 +938,7 @@ static ssize_t btrfs_exclusive_operation_show(struct kobject *kobj,
|
|||||||
str = "UNKNOWN\n";
|
str = "UNKNOWN\n";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
return scnprintf(buf, PAGE_SIZE, "%s", str);
|
return sysfs_emit(buf, "%s", str);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
|
BTRFS_ATTR(, exclusive_operation, btrfs_exclusive_operation_show);
|
||||||
|
|
||||||
@@ -950,7 +947,7 @@ static ssize_t btrfs_generation_show(struct kobject *kobj,
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", fs_info->generation);
|
return sysfs_emit(buf, "%llu\n", fs_info->generation);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(, generation, btrfs_generation_show);
|
BTRFS_ATTR(, generation, btrfs_generation_show);
|
||||||
|
|
||||||
@@ -1028,8 +1025,7 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
|
|||||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
|
|
||||||
ret = scnprintf(buf, PAGE_SIZE, "%d\n",
|
ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
|
||||||
READ_ONCE(fs_info->bg_reclaim_threshold));
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@@ -1471,7 +1467,7 @@ static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
|
|||||||
|
|
||||||
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
return sysfs_emit(buf, "%d\n", val);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
|
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
|
||||||
|
|
||||||
@@ -1484,7 +1480,7 @@ static ssize_t btrfs_devinfo_missing_show(struct kobject *kobj,
|
|||||||
|
|
||||||
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
return sysfs_emit(buf, "%d\n", val);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(devid, missing, btrfs_devinfo_missing_show);
|
BTRFS_ATTR(devid, missing, btrfs_devinfo_missing_show);
|
||||||
|
|
||||||
@@ -1498,7 +1494,7 @@ static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
|
|||||||
|
|
||||||
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
return sysfs_emit(buf, "%d\n", val);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
|
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
|
||||||
|
|
||||||
@@ -1509,8 +1505,7 @@ static ssize_t btrfs_devinfo_scrub_speed_max_show(struct kobject *kobj,
|
|||||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||||
devid_kobj);
|
devid_kobj);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
return sysfs_emit(buf, "%llu\n", READ_ONCE(device->scrub_speed_max));
|
||||||
READ_ONCE(device->scrub_speed_max));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj,
|
static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj,
|
||||||
@@ -1538,7 +1533,7 @@ static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
|
|||||||
|
|
||||||
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
return sysfs_emit(buf, "%d\n", val);
|
||||||
}
|
}
|
||||||
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
|
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
|
||||||
|
|
||||||
@@ -1549,14 +1544,14 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
|
|||||||
devid_kobj);
|
devid_kobj);
|
||||||
|
|
||||||
if (!device->dev_stats_valid)
|
if (!device->dev_stats_valid)
|
||||||
return scnprintf(buf, PAGE_SIZE, "invalid\n");
|
return sysfs_emit(buf, "invalid\n");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Print all at once so we get a snapshot of all values from the same
|
* Print all at once so we get a snapshot of all values from the same
|
||||||
* time. Keep them in sync and in order of definition of
|
* time. Keep them in sync and in order of definition of
|
||||||
* btrfs_dev_stat_values.
|
* btrfs_dev_stat_values.
|
||||||
*/
|
*/
|
||||||
return scnprintf(buf, PAGE_SIZE,
|
return sysfs_emit(buf,
|
||||||
"write_errs %d\n"
|
"write_errs %d\n"
|
||||||
"read_errs %d\n"
|
"read_errs %d\n"
|
||||||
"flush_errs %d\n"
|
"flush_errs %d\n"
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ static int test_btrfs_split_item(u32 sectorsize, u32 nodesize)
|
|||||||
key.type = BTRFS_EXTENT_CSUM_KEY;
|
key.type = BTRFS_EXTENT_CSUM_KEY;
|
||||||
key.offset = 0;
|
key.offset = 0;
|
||||||
|
|
||||||
setup_items_for_insert(root, path, &key, &value_len, 1);
|
btrfs_setup_item_for_insert(root, path, &key, value_len);
|
||||||
item = btrfs_item_nr(0);
|
item = btrfs_item_nr(0);
|
||||||
write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
|
write_extent_buffer(eb, value, btrfs_item_ptr_offset(eb, 0),
|
||||||
value_len);
|
value_len);
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ static int test_find_delalloc(u32 sectorsize)
|
|||||||
*/
|
*/
|
||||||
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
|
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
|
||||||
start = 0;
|
start = 0;
|
||||||
end = 0;
|
end = start + PAGE_SIZE - 1;
|
||||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||||
&end);
|
&end);
|
||||||
if (!found) {
|
if (!found) {
|
||||||
@@ -143,7 +143,7 @@ static int test_find_delalloc(u32 sectorsize)
|
|||||||
}
|
}
|
||||||
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
|
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
|
||||||
start = test_start;
|
start = test_start;
|
||||||
end = 0;
|
end = start + PAGE_SIZE - 1;
|
||||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||||
&end);
|
&end);
|
||||||
if (!found) {
|
if (!found) {
|
||||||
@@ -177,14 +177,14 @@ static int test_find_delalloc(u32 sectorsize)
|
|||||||
goto out_bits;
|
goto out_bits;
|
||||||
}
|
}
|
||||||
start = test_start;
|
start = test_start;
|
||||||
end = 0;
|
end = start + PAGE_SIZE - 1;
|
||||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||||
&end);
|
&end);
|
||||||
if (found) {
|
if (found) {
|
||||||
test_err("found range when we shouldn't have");
|
test_err("found range when we shouldn't have");
|
||||||
goto out_bits;
|
goto out_bits;
|
||||||
}
|
}
|
||||||
if (end != (u64)-1) {
|
if (end != test_start + PAGE_SIZE - 1) {
|
||||||
test_err("did not return the proper end offset");
|
test_err("did not return the proper end offset");
|
||||||
goto out_bits;
|
goto out_bits;
|
||||||
}
|
}
|
||||||
@@ -198,7 +198,7 @@ static int test_find_delalloc(u32 sectorsize)
|
|||||||
*/
|
*/
|
||||||
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
|
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
|
||||||
start = test_start;
|
start = test_start;
|
||||||
end = 0;
|
end = start + PAGE_SIZE - 1;
|
||||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||||
&end);
|
&end);
|
||||||
if (!found) {
|
if (!found) {
|
||||||
@@ -233,7 +233,7 @@ static int test_find_delalloc(u32 sectorsize)
|
|||||||
/* We unlocked it in the previous test */
|
/* We unlocked it in the previous test */
|
||||||
lock_page(locked_page);
|
lock_page(locked_page);
|
||||||
start = test_start;
|
start = test_start;
|
||||||
end = 0;
|
end = start + PAGE_SIZE - 1;
|
||||||
/*
|
/*
|
||||||
* Currently if we fail to find dirty pages in the delalloc range we
|
* Currently if we fail to find dirty pages in the delalloc range we
|
||||||
* will adjust max_bytes down to PAGE_SIZE and then re-search. If
|
* will adjust max_bytes down to PAGE_SIZE and then re-search. If
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
|
|||||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||||
key.offset = start;
|
key.offset = start;
|
||||||
|
|
||||||
setup_items_for_insert(root, &path, &key, &value_len, 1);
|
btrfs_setup_item_for_insert(root, &path, &key, value_len);
|
||||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||||
btrfs_set_file_extent_generation(leaf, fi, 1);
|
btrfs_set_file_extent_generation(leaf, fi, 1);
|
||||||
btrfs_set_file_extent_type(leaf, fi, type);
|
btrfs_set_file_extent_type(leaf, fi, type);
|
||||||
@@ -63,7 +63,7 @@ static void insert_inode_item_key(struct btrfs_root *root)
|
|||||||
key.type = BTRFS_INODE_ITEM_KEY;
|
key.type = BTRFS_INODE_ITEM_KEY;
|
||||||
key.offset = 0;
|
key.offset = 0;
|
||||||
|
|
||||||
setup_items_for_insert(root, &path, &key, &value_len, 1);
|
btrfs_setup_item_for_insert(root, &path, &key, value_len);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@@ -283,7 +283,7 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
|
|||||||
spin_lock(&fs_info->trans_lock);
|
spin_lock(&fs_info->trans_lock);
|
||||||
loop:
|
loop:
|
||||||
/* The file system has been taken offline. No new transactions. */
|
/* The file system has been taken offline. No new transactions. */
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
if (BTRFS_FS_ERROR(fs_info)) {
|
||||||
spin_unlock(&fs_info->trans_lock);
|
spin_unlock(&fs_info->trans_lock);
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
}
|
}
|
||||||
@@ -331,7 +331,7 @@ loop:
|
|||||||
*/
|
*/
|
||||||
kfree(cur_trans);
|
kfree(cur_trans);
|
||||||
goto loop;
|
goto loop;
|
||||||
} else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
} else if (BTRFS_FS_ERROR(fs_info)) {
|
||||||
spin_unlock(&fs_info->trans_lock);
|
spin_unlock(&fs_info->trans_lock);
|
||||||
kfree(cur_trans);
|
kfree(cur_trans);
|
||||||
return -EROFS;
|
return -EROFS;
|
||||||
@@ -579,7 +579,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
|||||||
bool do_chunk_alloc = false;
|
bool do_chunk_alloc = false;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
|
if (BTRFS_FS_ERROR(fs_info))
|
||||||
return ERR_PTR(-EROFS);
|
return ERR_PTR(-EROFS);
|
||||||
|
|
||||||
if (current->journal_info) {
|
if (current->journal_info) {
|
||||||
@@ -991,8 +991,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
|||||||
if (throttle)
|
if (throttle)
|
||||||
btrfs_run_delayed_iputs(info);
|
btrfs_run_delayed_iputs(info);
|
||||||
|
|
||||||
if (TRANS_ABORTED(trans) ||
|
if (TRANS_ABORTED(trans) || BTRFS_FS_ERROR(info)) {
|
||||||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
|
|
||||||
wake_up_process(info->transaction_kthread);
|
wake_up_process(info->transaction_kthread);
|
||||||
if (TRANS_ABORTED(trans))
|
if (TRANS_ABORTED(trans))
|
||||||
err = trans->aborted;
|
err = trans->aborted;
|
||||||
@@ -2155,7 +2154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|||||||
* abort to prevent writing a new superblock that reflects a
|
* abort to prevent writing a new superblock that reflects a
|
||||||
* corrupt state (pointing to trees with unwritten nodes/leafs).
|
* corrupt state (pointing to trees with unwritten nodes/leafs).
|
||||||
*/
|
*/
|
||||||
if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
|
if (BTRFS_FS_ERROR(fs_info)) {
|
||||||
ret = -EROFS;
|
ret = -EROFS;
|
||||||
goto cleanup_transaction;
|
goto cleanup_transaction;
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,8 @@ struct btrfs_log_ctx {
|
|||||||
int log_transid;
|
int log_transid;
|
||||||
bool log_new_dentries;
|
bool log_new_dentries;
|
||||||
bool logging_new_name;
|
bool logging_new_name;
|
||||||
|
/* Tracks the last logged dir item/index key offset. */
|
||||||
|
u64 last_dir_item_offset;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
/* Only used for fast fsyncs. */
|
/* Only used for fast fsyncs. */
|
||||||
@@ -68,11 +70,11 @@ int btrfs_recover_log_trees(struct btrfs_root *tree_root);
|
|||||||
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
|
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
|
||||||
struct dentry *dentry,
|
struct dentry *dentry,
|
||||||
struct btrfs_log_ctx *ctx);
|
struct btrfs_log_ctx *ctx);
|
||||||
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
const char *name, int name_len,
|
const char *name, int name_len,
|
||||||
struct btrfs_inode *dir, u64 index);
|
struct btrfs_inode *dir, u64 index);
|
||||||
int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
const char *name, int name_len,
|
const char *name, int name_len,
|
||||||
struct btrfs_inode *inode, u64 dirid);
|
struct btrfs_inode *inode, u64 dirid);
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -236,17 +236,40 @@ struct btrfs_fs_devices {
|
|||||||
bool fsid_change;
|
bool fsid_change;
|
||||||
struct list_head fs_list;
|
struct list_head fs_list;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of devices under this fsid including missing and
|
||||||
|
* replace-target device and excludes seed devices.
|
||||||
|
*/
|
||||||
u64 num_devices;
|
u64 num_devices;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The number of devices that successfully opened, including
|
||||||
|
* replace-target, excludes seed devices.
|
||||||
|
*/
|
||||||
u64 open_devices;
|
u64 open_devices;
|
||||||
|
|
||||||
|
/* The number of devices that are under the chunk allocation list. */
|
||||||
u64 rw_devices;
|
u64 rw_devices;
|
||||||
|
|
||||||
|
/* Count of missing devices under this fsid excluding seed device. */
|
||||||
u64 missing_devices;
|
u64 missing_devices;
|
||||||
u64 total_rw_bytes;
|
u64 total_rw_bytes;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Count of devices from btrfs_super_block::num_devices for this fsid,
|
||||||
|
* which includes the seed device, excludes the transient replace-target
|
||||||
|
* device.
|
||||||
|
*/
|
||||||
u64 total_devices;
|
u64 total_devices;
|
||||||
|
|
||||||
/* Highest generation number of seen devices */
|
/* Highest generation number of seen devices */
|
||||||
u64 latest_generation;
|
u64 latest_generation;
|
||||||
|
|
||||||
struct block_device *latest_bdev;
|
/*
|
||||||
|
* The mount device or a device with highest generation after removal
|
||||||
|
* or replace.
|
||||||
|
*/
|
||||||
|
struct btrfs_device *latest_dev;
|
||||||
|
|
||||||
/* all of the devices in the FS, protected by a mutex
|
/* all of the devices in the FS, protected by a mutex
|
||||||
* so we can safely walk it to write out the supers without
|
* so we can safely walk it to write out the supers without
|
||||||
@@ -300,48 +323,62 @@ struct btrfs_fs_devices {
|
|||||||
/ sizeof(struct btrfs_stripe) + 1)
|
/ sizeof(struct btrfs_stripe) + 1)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we need the mirror number and stripe index to be passed around
|
* Additional info to pass along bio.
|
||||||
* the call chain while we are processing end_io (especially errors).
|
*
|
||||||
* Really, what we need is a btrfs_bio structure that has this info
|
* Mostly for btrfs specific features like csum and mirror_num.
|
||||||
* and is properly sized with its stripe array, but we're not there
|
|
||||||
* quite yet. We have our own btrfs bioset, and all of the bios
|
|
||||||
* we allocate are actually btrfs_io_bios. We'll cram as much of
|
|
||||||
* struct btrfs_bio as we can into this over time.
|
|
||||||
*/
|
*/
|
||||||
struct btrfs_io_bio {
|
struct btrfs_bio {
|
||||||
unsigned int mirror_num;
|
unsigned int mirror_num;
|
||||||
|
|
||||||
|
/* @device is for stripe IO submission. */
|
||||||
struct btrfs_device *device;
|
struct btrfs_device *device;
|
||||||
u64 logical;
|
|
||||||
u8 *csum;
|
u8 *csum;
|
||||||
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
||||||
struct bvec_iter iter;
|
struct bvec_iter iter;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This member must come last, bio_alloc_bioset will allocate enough
|
* This member must come last, bio_alloc_bioset will allocate enough
|
||||||
* bytes for entire btrfs_io_bio but relies on bio being last.
|
* bytes for entire btrfs_bio but relies on bio being last.
|
||||||
*/
|
*/
|
||||||
struct bio bio;
|
struct bio bio;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
|
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
|
||||||
{
|
{
|
||||||
return container_of(bio, struct btrfs_io_bio, bio);
|
return container_of(bio, struct btrfs_bio, bio);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void btrfs_io_bio_free_csum(struct btrfs_io_bio *io_bio)
|
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
|
||||||
{
|
{
|
||||||
if (io_bio->csum != io_bio->csum_inline) {
|
if (bbio->csum != bbio->csum_inline) {
|
||||||
kfree(io_bio->csum);
|
kfree(bbio->csum);
|
||||||
io_bio->csum = NULL;
|
bbio->csum = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct btrfs_bio_stripe {
|
struct btrfs_io_stripe {
|
||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
u64 physical;
|
u64 physical;
|
||||||
u64 length; /* only used for discard mappings */
|
u64 length; /* only used for discard mappings */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_bio {
|
/*
|
||||||
|
* Context for IO subsmission for device stripe.
|
||||||
|
*
|
||||||
|
* - Track the unfinished mirrors for mirror based profiles
|
||||||
|
* Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
|
||||||
|
*
|
||||||
|
* - Contain the logical -> physical mapping info
|
||||||
|
* Used by submit_stripe_bio() for mapping logical bio
|
||||||
|
* into physical device address.
|
||||||
|
*
|
||||||
|
* - Contain device replace info
|
||||||
|
* Used by handle_ops_on_dev_replace() to copy logical bios
|
||||||
|
* into the new device.
|
||||||
|
*
|
||||||
|
* - Contain RAID56 full stripe logical bytenrs
|
||||||
|
*/
|
||||||
|
struct btrfs_io_context {
|
||||||
refcount_t refs;
|
refcount_t refs;
|
||||||
atomic_t stripes_pending;
|
atomic_t stripes_pending;
|
||||||
struct btrfs_fs_info *fs_info;
|
struct btrfs_fs_info *fs_info;
|
||||||
@@ -361,7 +398,7 @@ struct btrfs_bio {
|
|||||||
* so raid_map[0] is the start of our full stripe
|
* so raid_map[0] is the start of our full stripe
|
||||||
*/
|
*/
|
||||||
u64 *raid_map;
|
u64 *raid_map;
|
||||||
struct btrfs_bio_stripe stripes[];
|
struct btrfs_io_stripe stripes[];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_device_info {
|
struct btrfs_device_info {
|
||||||
@@ -396,11 +433,11 @@ struct map_lookup {
|
|||||||
int num_stripes;
|
int num_stripes;
|
||||||
int sub_stripes;
|
int sub_stripes;
|
||||||
int verified_stripes; /* For mount time dev extent verification */
|
int verified_stripes; /* For mount time dev extent verification */
|
||||||
struct btrfs_bio_stripe stripes[];
|
struct btrfs_io_stripe stripes[];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
|
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
|
||||||
(sizeof(struct btrfs_bio_stripe) * (n)))
|
(sizeof(struct btrfs_io_stripe) * (n)))
|
||||||
|
|
||||||
struct btrfs_balance_args;
|
struct btrfs_balance_args;
|
||||||
struct btrfs_balance_progress;
|
struct btrfs_balance_progress;
|
||||||
@@ -414,6 +451,22 @@ struct btrfs_balance_control {
|
|||||||
struct btrfs_balance_progress stat;
|
struct btrfs_balance_progress stat;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Search for a given device by the set parameters
|
||||||
|
*/
|
||||||
|
struct btrfs_dev_lookup_args {
|
||||||
|
u64 devid;
|
||||||
|
u8 *uuid;
|
||||||
|
u8 *fsid;
|
||||||
|
bool missing;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */
|
||||||
|
#define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 }
|
||||||
|
|
||||||
|
#define BTRFS_DEV_LOOKUP_ARGS(name) \
|
||||||
|
struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT
|
||||||
|
|
||||||
enum btrfs_map_op {
|
enum btrfs_map_op {
|
||||||
BTRFS_MAP_READ,
|
BTRFS_MAP_READ,
|
||||||
BTRFS_MAP_WRITE,
|
BTRFS_MAP_WRITE,
|
||||||
@@ -437,20 +490,20 @@ static inline enum btrfs_map_op btrfs_op(struct bio *bio)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_get_bbio(struct btrfs_bio *bbio);
|
void btrfs_get_bioc(struct btrfs_io_context *bioc);
|
||||||
void btrfs_put_bbio(struct btrfs_bio *bbio);
|
void btrfs_put_bioc(struct btrfs_io_context *bioc);
|
||||||
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||||
u64 logical, u64 *length,
|
u64 logical, u64 *length,
|
||||||
struct btrfs_bio **bbio_ret, int mirror_num);
|
struct btrfs_io_context **bioc_ret, int mirror_num);
|
||||||
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||||
u64 logical, u64 *length,
|
u64 logical, u64 *length,
|
||||||
struct btrfs_bio **bbio_ret);
|
struct btrfs_io_context **bioc_ret);
|
||||||
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
|
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
|
||||||
enum btrfs_map_op op, u64 logical,
|
enum btrfs_map_op op, u64 logical,
|
||||||
struct btrfs_io_geometry *io_geom);
|
struct btrfs_io_geometry *io_geom);
|
||||||
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
||||||
struct btrfs_block_group *btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
|
||||||
u64 type);
|
u64 type);
|
||||||
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
|
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
|
||||||
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||||
@@ -467,19 +520,23 @@ void btrfs_assign_next_active_device(struct btrfs_device *device,
|
|||||||
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
|
struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
|
||||||
u64 devid,
|
u64 devid,
|
||||||
const char *devpath);
|
const char *devpath);
|
||||||
|
int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
|
||||||
|
struct btrfs_dev_lookup_args *args,
|
||||||
|
const char *path);
|
||||||
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
|
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
|
||||||
const u64 *devid,
|
const u64 *devid,
|
||||||
const u8 *uuid);
|
const u8 *uuid);
|
||||||
|
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
|
||||||
void btrfs_free_device(struct btrfs_device *device);
|
void btrfs_free_device(struct btrfs_device *device);
|
||||||
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
|
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
|
||||||
const char *device_path, u64 devid,
|
struct btrfs_dev_lookup_args *args,
|
||||||
struct block_device **bdev, fmode_t *mode);
|
struct block_device **bdev, fmode_t *mode);
|
||||||
void __exit btrfs_cleanup_fs_uuids(void);
|
void __exit btrfs_cleanup_fs_uuids(void);
|
||||||
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
|
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
|
||||||
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
int btrfs_grow_device(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_device *device, u64 new_size);
|
struct btrfs_device *device, u64 new_size);
|
||||||
struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
|
struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
|
||||||
u64 devid, u8 *uuid, u8 *fsid);
|
const struct btrfs_dev_lookup_args *args);
|
||||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||||
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
|
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
|
||||||
int btrfs_balance(struct btrfs_fs_info *fs_info,
|
int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||||
@@ -493,7 +550,7 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
|||||||
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
|
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_uuid_scan_kthread(void *data);
|
int btrfs_uuid_scan_kthread(void *data);
|
||||||
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||||
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||||
u64 *start, u64 *max_avail);
|
u64 *start, u64 *max_avail);
|
||||||
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
|
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
|
||||||
|
|||||||
@@ -138,7 +138,7 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
|||||||
* matches our target xattr, so lets check.
|
* matches our target xattr, so lets check.
|
||||||
*/
|
*/
|
||||||
ret = 0;
|
ret = 0;
|
||||||
btrfs_assert_tree_locked(path->nodes[0]);
|
btrfs_assert_tree_write_locked(path->nodes[0]);
|
||||||
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
|
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
|
||||||
if (!di && !(flags & XATTR_REPLACE)) {
|
if (!di && !(flags & XATTR_REPLACE)) {
|
||||||
ret = -ENOSPC;
|
ret = -ENOSPC;
|
||||||
|
|||||||
531
fs/btrfs/zoned.c
531
fs/btrfs/zoned.c
@@ -4,6 +4,7 @@
|
|||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
|
#include <linux/atomic.h>
|
||||||
#include "ctree.h"
|
#include "ctree.h"
|
||||||
#include "volumes.h"
|
#include "volumes.h"
|
||||||
#include "zoned.h"
|
#include "zoned.h"
|
||||||
@@ -38,6 +39,16 @@
|
|||||||
/* Number of superblock log zones */
|
/* Number of superblock log zones */
|
||||||
#define BTRFS_NR_SB_LOG_ZONES 2
|
#define BTRFS_NR_SB_LOG_ZONES 2
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minimum of active zones we need:
|
||||||
|
*
|
||||||
|
* - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
|
||||||
|
* - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
|
||||||
|
* - 1 zone for tree-log dedicated block group
|
||||||
|
* - 1 zone for relocation
|
||||||
|
*/
|
||||||
|
#define BTRFS_MIN_ACTIVE_ZONES (BTRFS_SUPER_MIRROR_MAX + 5)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Maximum supported zone size. Currently, SMR disks have a zone size of
|
* Maximum supported zone size. Currently, SMR disks have a zone size of
|
||||||
* 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not
|
* 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not
|
||||||
@@ -45,6 +56,14 @@
|
|||||||
*/
|
*/
|
||||||
#define BTRFS_MAX_ZONE_SIZE SZ_8G
|
#define BTRFS_MAX_ZONE_SIZE SZ_8G
|
||||||
|
|
||||||
|
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
|
||||||
|
|
||||||
|
static inline bool sb_zone_is_full(const struct blk_zone *zone)
|
||||||
|
{
|
||||||
|
return (zone->cond == BLK_ZONE_COND_FULL) ||
|
||||||
|
(zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity);
|
||||||
|
}
|
||||||
|
|
||||||
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
|
static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
|
||||||
{
|
{
|
||||||
struct blk_zone *zones = data;
|
struct blk_zone *zones = data;
|
||||||
@@ -60,14 +79,13 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
|
|||||||
bool empty[BTRFS_NR_SB_LOG_ZONES];
|
bool empty[BTRFS_NR_SB_LOG_ZONES];
|
||||||
bool full[BTRFS_NR_SB_LOG_ZONES];
|
bool full[BTRFS_NR_SB_LOG_ZONES];
|
||||||
sector_t sector;
|
sector_t sector;
|
||||||
|
int i;
|
||||||
|
|
||||||
ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL &&
|
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||||
zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL);
|
ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL);
|
||||||
|
empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY);
|
||||||
empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY);
|
full[i] = sb_zone_is_full(&zones[i]);
|
||||||
empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY);
|
}
|
||||||
full[0] = (zones[0].cond == BLK_ZONE_COND_FULL);
|
|
||||||
full[1] = (zones[1].cond == BLK_ZONE_COND_FULL);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Possible states of log buffer zones
|
* Possible states of log buffer zones
|
||||||
@@ -296,6 +314,9 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
struct btrfs_fs_info *fs_info = device->fs_info;
|
struct btrfs_fs_info *fs_info = device->fs_info;
|
||||||
struct btrfs_zoned_device_info *zone_info = NULL;
|
struct btrfs_zoned_device_info *zone_info = NULL;
|
||||||
struct block_device *bdev = device->bdev;
|
struct block_device *bdev = device->bdev;
|
||||||
|
struct request_queue *queue = bdev_get_queue(bdev);
|
||||||
|
unsigned int max_active_zones;
|
||||||
|
unsigned int nactive;
|
||||||
sector_t nr_sectors;
|
sector_t nr_sectors;
|
||||||
sector_t sector = 0;
|
sector_t sector = 0;
|
||||||
struct blk_zone *zones = NULL;
|
struct blk_zone *zones = NULL;
|
||||||
@@ -351,6 +372,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
if (!IS_ALIGNED(nr_sectors, zone_sectors))
|
if (!IS_ALIGNED(nr_sectors, zone_sectors))
|
||||||
zone_info->nr_zones++;
|
zone_info->nr_zones++;
|
||||||
|
|
||||||
|
max_active_zones = queue_max_active_zones(queue);
|
||||||
|
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
|
||||||
|
btrfs_err_in_rcu(fs_info,
|
||||||
|
"zoned: %s: max active zones %u is too small, need at least %u active zones",
|
||||||
|
rcu_str_deref(device->name), max_active_zones,
|
||||||
|
BTRFS_MIN_ACTIVE_ZONES);
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
zone_info->max_active_zones = max_active_zones;
|
||||||
|
|
||||||
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
||||||
if (!zone_info->seq_zones) {
|
if (!zone_info->seq_zones) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
@@ -363,6 +395,12 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
|
||||||
|
if (!zone_info->active_zones) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
|
zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
|
||||||
if (!zones) {
|
if (!zones) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
@@ -370,6 +408,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Get zones type */
|
/* Get zones type */
|
||||||
|
nactive = 0;
|
||||||
while (sector < nr_sectors) {
|
while (sector < nr_sectors) {
|
||||||
nr_zones = BTRFS_REPORT_NR_ZONES;
|
nr_zones = BTRFS_REPORT_NR_ZONES;
|
||||||
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
|
ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
|
||||||
@@ -380,8 +419,17 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
for (i = 0; i < nr_zones; i++) {
|
for (i = 0; i < nr_zones; i++) {
|
||||||
if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
|
if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
|
||||||
__set_bit(nreported, zone_info->seq_zones);
|
__set_bit(nreported, zone_info->seq_zones);
|
||||||
if (zones[i].cond == BLK_ZONE_COND_EMPTY)
|
switch (zones[i].cond) {
|
||||||
|
case BLK_ZONE_COND_EMPTY:
|
||||||
__set_bit(nreported, zone_info->empty_zones);
|
__set_bit(nreported, zone_info->empty_zones);
|
||||||
|
break;
|
||||||
|
case BLK_ZONE_COND_IMP_OPEN:
|
||||||
|
case BLK_ZONE_COND_EXP_OPEN:
|
||||||
|
case BLK_ZONE_COND_CLOSED:
|
||||||
|
__set_bit(nreported, zone_info->active_zones);
|
||||||
|
nactive++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
nreported++;
|
nreported++;
|
||||||
}
|
}
|
||||||
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
|
sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
|
||||||
@@ -396,6 +444,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (max_active_zones) {
|
||||||
|
if (nactive > max_active_zones) {
|
||||||
|
btrfs_err_in_rcu(device->fs_info,
|
||||||
|
"zoned: %u active zones on %s exceeds max_active_zones %u",
|
||||||
|
nactive, rcu_str_deref(device->name),
|
||||||
|
max_active_zones);
|
||||||
|
ret = -EIO;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
atomic_set(&zone_info->active_zones_left,
|
||||||
|
max_active_zones - nactive);
|
||||||
|
}
|
||||||
|
|
||||||
/* Validate superblock log */
|
/* Validate superblock log */
|
||||||
nr_zones = BTRFS_NR_SB_LOG_ZONES;
|
nr_zones = BTRFS_NR_SB_LOG_ZONES;
|
||||||
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
|
||||||
@@ -478,6 +539,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
|
|||||||
out:
|
out:
|
||||||
kfree(zones);
|
kfree(zones);
|
||||||
out_free_zone_info:
|
out_free_zone_info:
|
||||||
|
bitmap_free(zone_info->active_zones);
|
||||||
bitmap_free(zone_info->empty_zones);
|
bitmap_free(zone_info->empty_zones);
|
||||||
bitmap_free(zone_info->seq_zones);
|
bitmap_free(zone_info->seq_zones);
|
||||||
kfree(zone_info);
|
kfree(zone_info);
|
||||||
@@ -493,6 +555,7 @@ void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
|
|||||||
if (!zone_info)
|
if (!zone_info)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
bitmap_free(zone_info->active_zones);
|
||||||
bitmap_free(zone_info->seq_zones);
|
bitmap_free(zone_info->seq_zones);
|
||||||
bitmap_free(zone_info->empty_zones);
|
bitmap_free(zone_info->empty_zones);
|
||||||
kfree(zone_info);
|
kfree(zone_info);
|
||||||
@@ -585,7 +648,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
|
* stripe_size is always aligned to BTRFS_STRIPE_LEN in
|
||||||
* __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
|
* btrfs_create_chunk(). Since we want stripe_len == zone_size,
|
||||||
* check the alignment here.
|
* check the alignment here.
|
||||||
*/
|
*/
|
||||||
if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
|
if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
|
||||||
@@ -664,7 +727,7 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
|
|||||||
reset = &zones[1];
|
reset = &zones[1];
|
||||||
|
|
||||||
if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
|
if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
|
||||||
ASSERT(reset->cond == BLK_ZONE_COND_FULL);
|
ASSERT(sb_zone_is_full(reset));
|
||||||
|
|
||||||
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
|
ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
|
||||||
reset->start, reset->len,
|
reset->start, reset->len,
|
||||||
@@ -676,9 +739,20 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
|
|||||||
reset->wp = reset->start;
|
reset->wp = reset->start;
|
||||||
}
|
}
|
||||||
} else if (ret != -ENOENT) {
|
} else if (ret != -ENOENT) {
|
||||||
/* For READ, we want the precious one */
|
/*
|
||||||
|
* For READ, we want the previous one. Move write pointer to
|
||||||
|
* the end of a zone, if it is at the head of a zone.
|
||||||
|
*/
|
||||||
|
u64 zone_end = 0;
|
||||||
|
|
||||||
if (wp == zones[0].start << SECTOR_SHIFT)
|
if (wp == zones[0].start << SECTOR_SHIFT)
|
||||||
wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
|
zone_end = zones[1].start + zones[1].capacity;
|
||||||
|
else if (wp == zones[1].start << SECTOR_SHIFT)
|
||||||
|
zone_end = zones[0].start + zones[0].capacity;
|
||||||
|
if (zone_end)
|
||||||
|
wp = ALIGN_DOWN(zone_end << SECTOR_SHIFT,
|
||||||
|
BTRFS_SUPER_INFO_SIZE);
|
||||||
|
|
||||||
wp -= BTRFS_SUPER_INFO_SIZE;
|
wp -= BTRFS_SUPER_INFO_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -771,36 +845,56 @@ static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||||
{
|
{
|
||||||
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||||
struct blk_zone *zone;
|
struct blk_zone *zone;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (!is_sb_log_zone(zinfo, mirror))
|
if (!is_sb_log_zone(zinfo, mirror))
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror];
|
zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror];
|
||||||
if (zone->cond != BLK_ZONE_COND_FULL) {
|
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||||
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
/* Advance the next zone */
|
||||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
if (zone->cond == BLK_ZONE_COND_FULL) {
|
||||||
|
zone++;
|
||||||
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
|
continue;
|
||||||
|
|
||||||
if (zone->wp == zone->start + zone->len)
|
|
||||||
zone->cond = BLK_ZONE_COND_FULL;
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
zone++;
|
|
||||||
ASSERT(zone->cond != BLK_ZONE_COND_FULL);
|
|
||||||
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
if (zone->cond == BLK_ZONE_COND_EMPTY)
|
||||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||||
|
|
||||||
zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
|
zone->wp += SUPER_INFO_SECTORS;
|
||||||
|
|
||||||
if (zone->wp == zone->start + zone->len)
|
if (sb_zone_is_full(zone)) {
|
||||||
|
/*
|
||||||
|
* No room left to write new superblock. Since
|
||||||
|
* superblock is written with REQ_SYNC, it is safe to
|
||||||
|
* finish the zone now.
|
||||||
|
*
|
||||||
|
* If the write pointer is exactly at the capacity,
|
||||||
|
* explicit ZONE_FINISH is not necessary.
|
||||||
|
*/
|
||||||
|
if (zone->wp != zone->start + zone->capacity) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = blkdev_zone_mgmt(device->bdev,
|
||||||
|
REQ_OP_ZONE_FINISH, zone->start,
|
||||||
|
zone->len, GFP_NOFS);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
zone->wp = zone->start + zone->len;
|
||||||
zone->cond = BLK_ZONE_COND_FULL;
|
zone->cond = BLK_ZONE_COND_FULL;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All the zones are FULL. Should not reach here. */
|
||||||
|
ASSERT(0);
|
||||||
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
||||||
@@ -895,6 +989,41 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
|
|||||||
return pos;
|
return pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool btrfs_dev_set_active_zone(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
unsigned int zno = (pos >> zone_info->zone_size_shift);
|
||||||
|
|
||||||
|
/* We can use any number of zones */
|
||||||
|
if (zone_info->max_active_zones == 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!test_bit(zno, zone_info->active_zones)) {
|
||||||
|
/* Active zone left? */
|
||||||
|
if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0)
|
||||||
|
return false;
|
||||||
|
if (test_and_set_bit(zno, zone_info->active_zones)) {
|
||||||
|
/* Someone already set the bit */
|
||||||
|
atomic_inc(&zone_info->active_zones_left);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void btrfs_dev_clear_active_zone(struct btrfs_device *device, u64 pos)
|
||||||
|
{
|
||||||
|
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||||
|
unsigned int zno = (pos >> zone_info->zone_size_shift);
|
||||||
|
|
||||||
|
/* We can use any number of zones */
|
||||||
|
if (zone_info->max_active_zones == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (test_and_clear_bit(zno, zone_info->active_zones))
|
||||||
|
atomic_inc(&zone_info->active_zones_left);
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
||||||
u64 length, u64 *bytes)
|
u64 length, u64 *bytes)
|
||||||
{
|
{
|
||||||
@@ -910,6 +1039,7 @@ int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
|
|||||||
*bytes = length;
|
*bytes = length;
|
||||||
while (length) {
|
while (length) {
|
||||||
btrfs_dev_set_zone_empty(device, physical);
|
btrfs_dev_set_zone_empty(device, physical);
|
||||||
|
btrfs_dev_clear_active_zone(device, physical);
|
||||||
physical += device->zone_info->zone_size;
|
physical += device->zone_info->zone_size;
|
||||||
length -= device->zone_info->zone_size;
|
length -= device->zone_info->zone_size;
|
||||||
}
|
}
|
||||||
@@ -1039,6 +1169,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
int i;
|
int i;
|
||||||
unsigned int nofs_flag;
|
unsigned int nofs_flag;
|
||||||
u64 *alloc_offsets = NULL;
|
u64 *alloc_offsets = NULL;
|
||||||
|
u64 *caps = NULL;
|
||||||
|
unsigned long *active = NULL;
|
||||||
u64 last_alloc = 0;
|
u64 last_alloc = 0;
|
||||||
u32 num_sequential = 0, num_conventional = 0;
|
u32 num_sequential = 0, num_conventional = 0;
|
||||||
|
|
||||||
@@ -1063,10 +1195,28 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
|
|
||||||
map = em->map_lookup;
|
map = em->map_lookup;
|
||||||
|
|
||||||
|
cache->physical_map = kmemdup(map, map_lookup_size(map->num_stripes), GFP_NOFS);
|
||||||
|
if (!cache->physical_map) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
|
alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
|
||||||
if (!alloc_offsets) {
|
if (!alloc_offsets) {
|
||||||
free_extent_map(em);
|
ret = -ENOMEM;
|
||||||
return -ENOMEM;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS);
|
||||||
|
if (!caps) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
|
||||||
|
if (!active) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < map->num_stripes; i++) {
|
for (i = 0; i < map->num_stripes; i++) {
|
||||||
@@ -1131,6 +1281,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
caps[i] = (zone.capacity << SECTOR_SHIFT);
|
||||||
|
|
||||||
switch (zone.cond) {
|
switch (zone.cond) {
|
||||||
case BLK_ZONE_COND_OFFLINE:
|
case BLK_ZONE_COND_OFFLINE:
|
||||||
case BLK_ZONE_COND_READONLY:
|
case BLK_ZONE_COND_READONLY:
|
||||||
@@ -1144,14 +1296,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
alloc_offsets[i] = 0;
|
alloc_offsets[i] = 0;
|
||||||
break;
|
break;
|
||||||
case BLK_ZONE_COND_FULL:
|
case BLK_ZONE_COND_FULL:
|
||||||
alloc_offsets[i] = fs_info->zone_size;
|
alloc_offsets[i] = caps[i];
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* Partially used zone */
|
/* Partially used zone */
|
||||||
alloc_offsets[i] =
|
alloc_offsets[i] =
|
||||||
((zone.wp - zone.start) << SECTOR_SHIFT);
|
((zone.wp - zone.start) << SECTOR_SHIFT);
|
||||||
|
__set_bit(i, active);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Consider a zone as active if we can allow any number of
|
||||||
|
* active zones.
|
||||||
|
*/
|
||||||
|
if (!device->zone_info->max_active_zones)
|
||||||
|
__set_bit(i, active);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_sequential > 0)
|
if (num_sequential > 0)
|
||||||
@@ -1169,6 +1329,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
* calculate_alloc_pointer() which takes extent buffer
|
* calculate_alloc_pointer() which takes extent buffer
|
||||||
* locks to avoid deadlock.
|
* locks to avoid deadlock.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* Zone capacity is always zone size in emulation */
|
||||||
|
cache->zone_capacity = cache->length;
|
||||||
if (new) {
|
if (new) {
|
||||||
cache->alloc_offset = 0;
|
cache->alloc_offset = 0;
|
||||||
goto out;
|
goto out;
|
||||||
@@ -1195,6 +1358,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
cache->alloc_offset = alloc_offsets[0];
|
cache->alloc_offset = alloc_offsets[0];
|
||||||
|
cache->zone_capacity = caps[0];
|
||||||
|
cache->zone_is_active = test_bit(0, active);
|
||||||
break;
|
break;
|
||||||
case BTRFS_BLOCK_GROUP_DUP:
|
case BTRFS_BLOCK_GROUP_DUP:
|
||||||
case BTRFS_BLOCK_GROUP_RAID1:
|
case BTRFS_BLOCK_GROUP_RAID1:
|
||||||
@@ -1210,6 +1375,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cache->zone_is_active) {
|
||||||
|
btrfs_get_block_group(cache);
|
||||||
|
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||||
|
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
|
||||||
|
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (cache->alloc_offset > fs_info->zone_size) {
|
if (cache->alloc_offset > fs_info->zone_size) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
@@ -1218,6 +1390,14 @@ out:
|
|||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cache->alloc_offset > cache->zone_capacity) {
|
||||||
|
btrfs_err(fs_info,
|
||||||
|
"zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu",
|
||||||
|
cache->alloc_offset, cache->zone_capacity,
|
||||||
|
cache->start);
|
||||||
|
ret = -EIO;
|
||||||
|
}
|
||||||
|
|
||||||
/* An extent is allocated after the write pointer */
|
/* An extent is allocated after the write pointer */
|
||||||
if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
|
if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
|
||||||
btrfs_err(fs_info,
|
btrfs_err(fs_info,
|
||||||
@@ -1229,6 +1409,12 @@ out:
|
|||||||
if (!ret)
|
if (!ret)
|
||||||
cache->meta_write_pointer = cache->alloc_offset + cache->start;
|
cache->meta_write_pointer = cache->alloc_offset + cache->start;
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
kfree(cache->physical_map);
|
||||||
|
cache->physical_map = NULL;
|
||||||
|
}
|
||||||
|
bitmap_free(active);
|
||||||
|
kfree(caps);
|
||||||
kfree(alloc_offsets);
|
kfree(alloc_offsets);
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
@@ -1243,17 +1429,15 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
WARN_ON(cache->bytes_super != 0);
|
WARN_ON(cache->bytes_super != 0);
|
||||||
unusable = cache->alloc_offset - cache->used;
|
unusable = (cache->alloc_offset - cache->used) +
|
||||||
free = cache->length - cache->alloc_offset;
|
(cache->length - cache->zone_capacity);
|
||||||
|
free = cache->zone_capacity - cache->alloc_offset;
|
||||||
|
|
||||||
/* We only need ->free_space in ALLOC_SEQ block groups */
|
/* We only need ->free_space in ALLOC_SEQ block groups */
|
||||||
cache->last_byte_to_unpin = (u64)-1;
|
cache->last_byte_to_unpin = (u64)-1;
|
||||||
cache->cached = BTRFS_CACHE_FINISHED;
|
cache->cached = BTRFS_CACHE_FINISHED;
|
||||||
cache->free_space_ctl->free_space = free;
|
cache->free_space_ctl->free_space = free;
|
||||||
cache->zone_unusable = unusable;
|
cache->zone_unusable = unusable;
|
||||||
|
|
||||||
/* Should not have any excluded extents. Just in case, though */
|
|
||||||
btrfs_free_excluded_extents(cache);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
|
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
|
||||||
@@ -1304,6 +1488,17 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
|
|||||||
if (!is_data_inode(&inode->vfs_inode))
|
if (!is_data_inode(&inode->vfs_inode))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
|
||||||
|
* extent layout the relocation code has.
|
||||||
|
* Furthermore we have set aside own block-group from which only the
|
||||||
|
* relocation "process" can allocate and make sure only one process at a
|
||||||
|
* time can add pages to an extent that gets relocated, so it's safe to
|
||||||
|
* use regular REQ_OP_WRITE for this special case.
|
||||||
|
*/
|
||||||
|
if (btrfs_is_data_reloc_root(inode->root))
|
||||||
|
return false;
|
||||||
|
|
||||||
cache = btrfs_lookup_block_group(fs_info, start);
|
cache = btrfs_lookup_block_group(fs_info, start);
|
||||||
ASSERT(cache);
|
ASSERT(cache);
|
||||||
if (!cache)
|
if (!cache)
|
||||||
@@ -1440,27 +1635,27 @@ int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 len
|
|||||||
static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
|
static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
|
||||||
struct blk_zone *zone)
|
struct blk_zone *zone)
|
||||||
{
|
{
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_io_context *bioc = NULL;
|
||||||
u64 mapped_length = PAGE_SIZE;
|
u64 mapped_length = PAGE_SIZE;
|
||||||
unsigned int nofs_flag;
|
unsigned int nofs_flag;
|
||||||
int nmirrors;
|
int nmirrors;
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
|
||||||
&mapped_length, &bbio);
|
&mapped_length, &bioc);
|
||||||
if (ret || !bbio || mapped_length < PAGE_SIZE) {
|
if (ret || !bioc || mapped_length < PAGE_SIZE) {
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bioc(bioc);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
nofs_flag = memalloc_nofs_save();
|
nofs_flag = memalloc_nofs_save();
|
||||||
nmirrors = (int)bbio->num_stripes;
|
nmirrors = (int)bioc->num_stripes;
|
||||||
for (i = 0; i < nmirrors; i++) {
|
for (i = 0; i < nmirrors; i++) {
|
||||||
u64 physical = bbio->stripes[i].physical;
|
u64 physical = bioc->stripes[i].physical;
|
||||||
struct btrfs_device *dev = bbio->stripes[i].dev;
|
struct btrfs_device *dev = bioc->stripes[i].dev;
|
||||||
|
|
||||||
/* Missing device */
|
/* Missing device */
|
||||||
if (!dev->bdev)
|
if (!dev->bdev)
|
||||||
@@ -1530,3 +1725,251 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
return device;
|
return device;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Activate block group and underlying device zones
|
||||||
|
*
|
||||||
|
* @block_group: the block group to activate
|
||||||
|
*
|
||||||
|
* Return: true on success, false otherwise
|
||||||
|
*/
|
||||||
|
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||||
|
struct map_lookup *map;
|
||||||
|
struct btrfs_device *device;
|
||||||
|
u64 physical;
|
||||||
|
bool ret;
|
||||||
|
|
||||||
|
if (!btrfs_is_zoned(block_group->fs_info))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
map = block_group->physical_map;
|
||||||
|
/* Currently support SINGLE profile only */
|
||||||
|
ASSERT(map->num_stripes == 1);
|
||||||
|
device = map->stripes[0].dev;
|
||||||
|
physical = map->stripes[0].physical;
|
||||||
|
|
||||||
|
if (device->zone_info->max_active_zones == 0)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
|
||||||
|
if (block_group->zone_is_active) {
|
||||||
|
ret = true;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* No space left */
|
||||||
|
if (block_group->alloc_offset == block_group->zone_capacity) {
|
||||||
|
ret = false;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!btrfs_dev_set_active_zone(device, physical)) {
|
||||||
|
/* Cannot activate the zone */
|
||||||
|
ret = false;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Successfully activated all the zones */
|
||||||
|
block_group->zone_is_active = 1;
|
||||||
|
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
/* For the active block group list */
|
||||||
|
btrfs_get_block_group(block_group);
|
||||||
|
|
||||||
|
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||||
|
ASSERT(list_empty(&block_group->active_bg_list));
|
||||||
|
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
|
||||||
|
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||||
|
struct map_lookup *map;
|
||||||
|
struct btrfs_device *device;
|
||||||
|
u64 physical;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!btrfs_is_zoned(fs_info))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
map = block_group->physical_map;
|
||||||
|
/* Currently support SINGLE profile only */
|
||||||
|
ASSERT(map->num_stripes == 1);
|
||||||
|
|
||||||
|
device = map->stripes[0].dev;
|
||||||
|
physical = map->stripes[0].physical;
|
||||||
|
|
||||||
|
if (device->zone_info->max_active_zones == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
if (!block_group->zone_is_active) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if we have unwritten allocated space */
|
||||||
|
if ((block_group->flags &
|
||||||
|
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) &&
|
||||||
|
block_group->alloc_offset > block_group->meta_write_pointer) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
ret = btrfs_inc_block_group_ro(block_group, false);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/* Ensure all writes in this block group finish */
|
||||||
|
btrfs_wait_block_group_reservations(block_group);
|
||||||
|
/* No need to wait for NOCOW writers. Zoned mode does not allow that. */
|
||||||
|
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||||
|
block_group->length);
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bail out if someone already deactivated the block group, or
|
||||||
|
* allocated space is left in the block group.
|
||||||
|
*/
|
||||||
|
if (!block_group->zone_is_active) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (block_group->reserved) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
return -EAGAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
block_group->zone_is_active = 0;
|
||||||
|
block_group->alloc_offset = block_group->zone_capacity;
|
||||||
|
block_group->free_space_ctl->free_space = 0;
|
||||||
|
btrfs_clear_treelog_bg(block_group);
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||||
|
physical >> SECTOR_SHIFT,
|
||||||
|
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||||
|
GFP_NOFS);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
|
||||||
|
if (!ret) {
|
||||||
|
btrfs_dev_clear_active_zone(device, physical);
|
||||||
|
|
||||||
|
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||||
|
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||||
|
list_del_init(&block_group->active_bg_list);
|
||||||
|
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||||
|
|
||||||
|
/* For active_bg_list */
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index)
|
||||||
|
{
|
||||||
|
struct btrfs_device *device;
|
||||||
|
bool ret = false;
|
||||||
|
|
||||||
|
if (!btrfs_is_zoned(fs_devices->fs_info))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Non-single profiles are not supported yet */
|
||||||
|
if (raid_index != BTRFS_RAID_SINGLE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Check if there is a device with active zones left */
|
||||||
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
|
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||||
|
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||||
|
|
||||||
|
if (!device->bdev)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!zinfo->max_active_zones ||
|
||||||
|
atomic_read(&zinfo->active_zones_left)) {
|
||||||
|
ret = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mutex_unlock(&fs_devices->device_list_mutex);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
|
struct map_lookup *map;
|
||||||
|
struct btrfs_device *device;
|
||||||
|
u64 physical;
|
||||||
|
|
||||||
|
if (!btrfs_is_zoned(fs_info))
|
||||||
|
return;
|
||||||
|
|
||||||
|
block_group = btrfs_lookup_block_group(fs_info, logical);
|
||||||
|
ASSERT(block_group);
|
||||||
|
|
||||||
|
if (logical + length < block_group->start + block_group->zone_capacity)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
spin_lock(&block_group->lock);
|
||||||
|
|
||||||
|
if (!block_group->zone_is_active) {
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
block_group->zone_is_active = 0;
|
||||||
|
/* We should have consumed all the free space */
|
||||||
|
ASSERT(block_group->alloc_offset == block_group->zone_capacity);
|
||||||
|
ASSERT(block_group->free_space_ctl->free_space == 0);
|
||||||
|
btrfs_clear_treelog_bg(block_group);
|
||||||
|
spin_unlock(&block_group->lock);
|
||||||
|
|
||||||
|
map = block_group->physical_map;
|
||||||
|
device = map->stripes[0].dev;
|
||||||
|
physical = map->stripes[0].physical;
|
||||||
|
|
||||||
|
if (!device->zone_info->max_active_zones)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
btrfs_dev_clear_active_zone(device, physical);
|
||||||
|
|
||||||
|
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||||
|
ASSERT(!list_empty(&block_group->active_bg_list));
|
||||||
|
list_del_init(&block_group->active_bg_list);
|
||||||
|
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||||
|
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
|
||||||
|
out:
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = bg->fs_info;
|
||||||
|
|
||||||
|
spin_lock(&fs_info->relocation_bg_lock);
|
||||||
|
if (fs_info->data_reloc_bg == bg->start)
|
||||||
|
fs_info->data_reloc_bg = 0;
|
||||||
|
spin_unlock(&fs_info->relocation_bg_lock);
|
||||||
|
}
|
||||||
|
|||||||
@@ -23,8 +23,11 @@ struct btrfs_zoned_device_info {
|
|||||||
u64 zone_size;
|
u64 zone_size;
|
||||||
u8 zone_size_shift;
|
u8 zone_size_shift;
|
||||||
u32 nr_zones;
|
u32 nr_zones;
|
||||||
|
unsigned int max_active_zones;
|
||||||
|
atomic_t active_zones_left;
|
||||||
unsigned long *seq_zones;
|
unsigned long *seq_zones;
|
||||||
unsigned long *empty_zones;
|
unsigned long *empty_zones;
|
||||||
|
unsigned long *active_zones;
|
||||||
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
|
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -40,7 +43,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
|
|||||||
u64 *bytenr_ret);
|
u64 *bytenr_ret);
|
||||||
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
|
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
|
||||||
u64 *bytenr_ret);
|
u64 *bytenr_ret);
|
||||||
void btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
|
int btrfs_advance_sb_log(struct btrfs_device *device, int mirror);
|
||||||
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror);
|
int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror);
|
||||||
u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
|
u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
|
||||||
u64 hole_end, u64 num_bytes);
|
u64 hole_end, u64 num_bytes);
|
||||||
@@ -66,6 +69,13 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
|
|||||||
u64 physical_start, u64 physical_pos);
|
u64 physical_start, u64 physical_pos);
|
||||||
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
|
||||||
u64 logical, u64 length);
|
u64 logical, u64 length);
|
||||||
|
bool btrfs_zone_activate(struct btrfs_block_group *block_group);
|
||||||
|
int btrfs_zone_finish(struct btrfs_block_group *block_group);
|
||||||
|
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
|
||||||
|
int raid_index);
|
||||||
|
void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
|
||||||
|
u64 length);
|
||||||
|
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
|
||||||
#else /* CONFIG_BLK_DEV_ZONED */
|
#else /* CONFIG_BLK_DEV_ZONED */
|
||||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||||
struct blk_zone *zone)
|
struct blk_zone *zone)
|
||||||
@@ -113,8 +123,10 @@ static inline int btrfs_sb_log_location(struct btrfs_device *device, int mirror,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
static inline int btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
|
||||||
{ }
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
static inline int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
|
||||||
{
|
{
|
||||||
@@ -199,6 +211,27 @@ static inline struct btrfs_device *btrfs_zoned_get_device(
|
|||||||
return ERR_PTR(-EOPNOTSUPP);
|
return ERR_PTR(-EOPNOTSUPP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int btrfs_zone_finish(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
|
||||||
|
int raid_index)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
|
||||||
|
u64 logical, u64 length) { }
|
||||||
|
|
||||||
|
static inline void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) { }
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||||
|
|||||||
@@ -1782,12 +1782,13 @@ EXPORT_SYMBOL(generic_update_time);
|
|||||||
* This does the actual work of updating an inodes time or version. Must have
|
* This does the actual work of updating an inodes time or version. Must have
|
||||||
* had called mnt_want_write() before calling this.
|
* had called mnt_want_write() before calling this.
|
||||||
*/
|
*/
|
||||||
static int update_time(struct inode *inode, struct timespec64 *time, int flags)
|
int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
|
||||||
{
|
{
|
||||||
if (inode->i_op->update_time)
|
if (inode->i_op->update_time)
|
||||||
return inode->i_op->update_time(inode, time, flags);
|
return inode->i_op->update_time(inode, time, flags);
|
||||||
return generic_update_time(inode, time, flags);
|
return generic_update_time(inode, time, flags);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(inode_update_time);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* atime_needs_update - update the access time
|
* atime_needs_update - update the access time
|
||||||
@@ -1857,7 +1858,7 @@ void touch_atime(const struct path *path)
|
|||||||
* of the fs read only, e.g. subvolumes in Btrfs.
|
* of the fs read only, e.g. subvolumes in Btrfs.
|
||||||
*/
|
*/
|
||||||
now = current_time(inode);
|
now = current_time(inode);
|
||||||
update_time(inode, &now, S_ATIME);
|
inode_update_time(inode, &now, S_ATIME);
|
||||||
__mnt_drop_write(mnt);
|
__mnt_drop_write(mnt);
|
||||||
skip_update:
|
skip_update:
|
||||||
sb_end_write(inode->i_sb);
|
sb_end_write(inode->i_sb);
|
||||||
@@ -2002,7 +2003,7 @@ int file_update_time(struct file *file)
|
|||||||
if (__mnt_want_write_file(file))
|
if (__mnt_want_write_file(file))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
ret = update_time(inode, &now, sync_it);
|
ret = inode_update_time(inode, &now, sync_it);
|
||||||
__mnt_drop_write_file(file);
|
__mnt_drop_write_file(file);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
|||||||
@@ -2496,6 +2496,8 @@ enum file_time_flags {
|
|||||||
|
|
||||||
extern bool atime_needs_update(const struct path *, struct inode *);
|
extern bool atime_needs_update(const struct path *, struct inode *);
|
||||||
extern void touch_atime(const struct path *);
|
extern void touch_atime(const struct path *);
|
||||||
|
int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
|
||||||
|
|
||||||
static inline void file_accessed(struct file *file)
|
static inline void file_accessed(struct file *file)
|
||||||
{
|
{
|
||||||
if (!(file->f_flags & O_NOATIME))
|
if (!(file->f_flags & O_NOATIME))
|
||||||
|
|||||||
@@ -771,10 +771,16 @@ struct btrfs_ioctl_received_subvol_args {
|
|||||||
*/
|
*/
|
||||||
#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
|
#define BTRFS_SEND_FLAG_OMIT_END_CMD 0x4
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the protocol version in the structure
|
||||||
|
*/
|
||||||
|
#define BTRFS_SEND_FLAG_VERSION 0x8
|
||||||
|
|
||||||
#define BTRFS_SEND_FLAG_MASK \
|
#define BTRFS_SEND_FLAG_MASK \
|
||||||
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
|
(BTRFS_SEND_FLAG_NO_FILE_DATA | \
|
||||||
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
|
BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
|
||||||
BTRFS_SEND_FLAG_OMIT_END_CMD)
|
BTRFS_SEND_FLAG_OMIT_END_CMD | \
|
||||||
|
BTRFS_SEND_FLAG_VERSION)
|
||||||
|
|
||||||
struct btrfs_ioctl_send_args {
|
struct btrfs_ioctl_send_args {
|
||||||
__s64 send_fd; /* in */
|
__s64 send_fd; /* in */
|
||||||
@@ -782,7 +788,8 @@ struct btrfs_ioctl_send_args {
|
|||||||
__u64 __user *clone_sources; /* in */
|
__u64 __user *clone_sources; /* in */
|
||||||
__u64 parent_root; /* in */
|
__u64 parent_root; /* in */
|
||||||
__u64 flags; /* in */
|
__u64 flags; /* in */
|
||||||
__u64 reserved[4]; /* in */
|
__u32 version; /* in */
|
||||||
|
__u8 reserved[28]; /* in */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
Reference in New Issue
Block a user