for-5.6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl4vDYkACgkQxWXV+ddt WDsNJQ//WJEcYoRpN5Y7oOIk/vo5ulF68P3kUh3hl206A13xpaHorvTvZKAD5s2o C6xACJk839sGEhMdDRWvdeBDCHTedMk7EXjiZ6kJD+7EPpWmDllI5O6DTolT7SR2 b9zId4KCO+m8LiLZccRsxCJbdkJ7nJnz2c5+063TjsS3uq1BFudctRUjW/XnFCCZ JIE5iOkdXrA+bFqc+l2zKTwgByQyJg+hVKRTZEJBT0QZsyNQvHKzXAmXxGopW8bO SeuzFkiFTA0raK8xBz6mUwaZbk40Qlzm9v9AitFZx0x2nvQnMu447N3xyaiuyDWd Li1aMN0uFZNgSz+AemuLfG0Wj70x1HrQisEj958XKzn4cPpUuMcc3lr1PZ2NIX+C p6pSgaLOEq8Rc0U78/euZX6oyiLJPAmQO1TdkVMHrcMi36esBI6uG11rds+U+xeK XoP20qXLFVYLLrl3wH9F4yIzydfMYu66Us1AeRPRB14NSSa7tbCOG//aCafOoLM6 518sJCazSWlv1kDewK8dtLiXc8eM6XJN+KI4NygFZrUj2Rq376q5oovUUKKkn3iN pdHtF/7gAxIx6bZ+jY/gyt/Xe5AdPi7sKggahvrSOL3X+LLINwC4r+vAnnpd6yh4 NfJj5fobvc/mO9PEVMwgJ8PmHw5uNqeMlORGjk7stQs7Oez3tCw= =4OkE -----END PGP SIGNATURE----- Merge tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "Features, highlights: - async discard - "mount -o discard=async" to enable it - freed extents are not discarded immediatelly, but grouped together and trimmed later, with IO rate limiting - the "sync" mode submits short extents that could have been ignored completely by the device, for SATA prior to 3.1 the requests are unqueued and have a big impact on performance - the actual discard IO requests have been moved out of transaction commit to a worker thread, improving commit latency - IO rate and request size can be tuned by sysfs files, for now enabled only with CONFIG_BTRFS_DEBUG as we might need to add/delete the files and don't have a stable-ish ABI for general use, defaults are conservative - export device state info in sysfs, eg. missing, writeable - no discard of extents known to be untouched on disk (eg. after reservation) - device stats reset is logged with process name and PID that called the ioctl Fixes: - fix missing hole after hole punching and fsync when using NO_HOLES - writeback: range cyclic mode could miss some dirty pages and lead to OOM - two more corner cases for metadata_uuid change after power loss during the change - fix infinite loop during fsync after mix of rename operations Core changes: - qgroup assign returns ENOTCONN when quotas not enabled, used to return EINVAL that was confusing - device closing does not need to allocate memory anymore - snapshot aware code got removed, disabled for years due to performance problems, reimplmentation will allow to select wheter defrag breaks or does not break COW on shared extents - tree-checker: - check leaf chunk item size, cross check against number of stripes - verify location keys for DIR_ITEM, DIR_INDEX and XATTR items - new self test for physical -> logical mapping code, used for super block range exclusion - assertion helpers/macros updated to avoid objtool "unreachable code" reports on older compilers or config option combinations" * tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (84 commits) btrfs: free block groups after free'ing fs trees btrfs: Fix split-brain handling when changing FSID to metadata uuid btrfs: Handle another split brain scenario with metadata uuid feature btrfs: Factor out metadata_uuid code from find_fsid. btrfs: Call find_fsid from find_fsid_inprogress Btrfs: fix infinite loop during fsync after rename operations btrfs: set trans->drity in btrfs_commit_transaction btrfs: drop log root for dropped roots btrfs: sysfs, add devid/dev_state kobject and device attributes btrfs: Refactor btrfs_rmap_block to improve readability btrfs: Add self-tests for btrfs_rmap_block btrfs: selftests: Add support for dummy devices btrfs: Move and unexport btrfs_rmap_block btrfs: separate definition of assertion failure handlers btrfs: device stats, log when stats are zeroed btrfs: fix improper setting of scanned for range cyclic write cache pages btrfs: safely advance counter when looking up bio csums btrfs: remove unused member btrfs_device::work btrfs: remove unnecessary wrapper get_alloc_profile btrfs: add correction to handle -1 edge case in async discard ...
This commit is contained in:
commit
81a046b18b
@ -11,7 +11,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
|||||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||||
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||||
block-rsv.o delalloc-space.o block-group.o
|
block-rsv.o delalloc-space.o block-group.o discard.o
|
||||||
|
|
||||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||||
|
@ -14,6 +14,8 @@
|
|||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
#include "tree-log.h"
|
#include "tree-log.h"
|
||||||
#include "delalloc-space.h"
|
#include "delalloc-space.h"
|
||||||
|
#include "discard.h"
|
||||||
|
#include "raid56.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return target flags in extended format or 0 if restripe for this chunk_type
|
* Return target flags in extended format or 0 if restripe for this chunk_type
|
||||||
@ -95,7 +97,7 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
|
|||||||
return extended_to_chunk(flags | allowed);
|
return extended_to_chunk(flags | allowed);
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
||||||
{
|
{
|
||||||
unsigned seq;
|
unsigned seq;
|
||||||
u64 flags;
|
u64 flags;
|
||||||
@ -115,11 +117,6 @@ static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
|||||||
return btrfs_reduce_alloc_profile(fs_info, flags);
|
return btrfs_reduce_alloc_profile(fs_info, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
|
||||||
{
|
|
||||||
return get_alloc_profile(fs_info, orig_flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
void btrfs_get_block_group(struct btrfs_block_group *cache)
|
void btrfs_get_block_group(struct btrfs_block_group *cache)
|
||||||
{
|
{
|
||||||
atomic_inc(&cache->count);
|
atomic_inc(&cache->count);
|
||||||
@ -131,6 +128,15 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
|||||||
WARN_ON(cache->pinned > 0);
|
WARN_ON(cache->pinned > 0);
|
||||||
WARN_ON(cache->reserved > 0);
|
WARN_ON(cache->reserved > 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A block_group shouldn't be on the discard_list anymore.
|
||||||
|
* Remove the block_group from the discard_list to prevent us
|
||||||
|
* from causing a panic due to NULL pointer dereference.
|
||||||
|
*/
|
||||||
|
if (WARN_ON(!list_empty(&cache->discard_list)))
|
||||||
|
btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
|
||||||
|
cache);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If not empty, someone is still holding mutex of
|
* If not empty, someone is still holding mutex of
|
||||||
* full_stripe_lock, which can only be released by caller.
|
* full_stripe_lock, which can only be released by caller.
|
||||||
@ -466,8 +472,8 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
|
|||||||
} else if (extent_start > start && extent_start < end) {
|
} else if (extent_start > start && extent_start < end) {
|
||||||
size = extent_start - start;
|
size = extent_start - start;
|
||||||
total_added += size;
|
total_added += size;
|
||||||
ret = btrfs_add_free_space(block_group, start,
|
ret = btrfs_add_free_space_async_trimmed(block_group,
|
||||||
size);
|
start, size);
|
||||||
BUG_ON(ret); /* -ENOMEM or logic error */
|
BUG_ON(ret); /* -ENOMEM or logic error */
|
||||||
start = extent_end + 1;
|
start = extent_end + 1;
|
||||||
} else {
|
} else {
|
||||||
@ -478,7 +484,8 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
|
|||||||
if (start < end) {
|
if (start < end) {
|
||||||
size = end - start;
|
size = end - start;
|
||||||
total_added += size;
|
total_added += size;
|
||||||
ret = btrfs_add_free_space(block_group, start, size);
|
ret = btrfs_add_free_space_async_trimmed(block_group, start,
|
||||||
|
size);
|
||||||
BUG_ON(ret); /* -ENOMEM or logic error */
|
BUG_ON(ret); /* -ENOMEM or logic error */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1185,21 +1192,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
|||||||
struct btrfs_space_info *sinfo = cache->space_info;
|
struct btrfs_space_info *sinfo = cache->space_info;
|
||||||
u64 num_bytes;
|
u64 num_bytes;
|
||||||
u64 sinfo_used;
|
u64 sinfo_used;
|
||||||
u64 min_allocable_bytes;
|
|
||||||
int ret = -ENOSPC;
|
int ret = -ENOSPC;
|
||||||
|
|
||||||
/*
|
|
||||||
* We need some metadata space and system metadata space for
|
|
||||||
* allocating chunks in some corner cases until we force to set
|
|
||||||
* it to be readonly.
|
|
||||||
*/
|
|
||||||
if ((sinfo->flags &
|
|
||||||
(BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
|
|
||||||
!force)
|
|
||||||
min_allocable_bytes = SZ_1M;
|
|
||||||
else
|
|
||||||
min_allocable_bytes = 0;
|
|
||||||
|
|
||||||
spin_lock(&sinfo->lock);
|
spin_lock(&sinfo->lock);
|
||||||
spin_lock(&cache->lock);
|
spin_lock(&cache->lock);
|
||||||
|
|
||||||
@ -1217,10 +1211,9 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
|||||||
* sinfo_used + num_bytes should always <= sinfo->total_bytes.
|
* sinfo_used + num_bytes should always <= sinfo->total_bytes.
|
||||||
*
|
*
|
||||||
* Here we make sure if we mark this bg RO, we still have enough
|
* Here we make sure if we mark this bg RO, we still have enough
|
||||||
* free space as buffer (if min_allocable_bytes is not 0).
|
* free space as buffer.
|
||||||
*/
|
*/
|
||||||
if (sinfo_used + num_bytes + min_allocable_bytes <=
|
if (sinfo_used + num_bytes <= sinfo->total_bytes) {
|
||||||
sinfo->total_bytes) {
|
|
||||||
sinfo->bytes_readonly += num_bytes;
|
sinfo->bytes_readonly += num_bytes;
|
||||||
cache->ro++;
|
cache->ro++;
|
||||||
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
|
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
|
||||||
@ -1233,8 +1226,8 @@ out:
|
|||||||
btrfs_info(cache->fs_info,
|
btrfs_info(cache->fs_info,
|
||||||
"unable to make block group %llu ro", cache->start);
|
"unable to make block group %llu ro", cache->start);
|
||||||
btrfs_info(cache->fs_info,
|
btrfs_info(cache->fs_info,
|
||||||
"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
|
"sinfo_used=%llu bg_num_bytes=%llu",
|
||||||
sinfo_used, num_bytes, min_allocable_bytes);
|
sinfo_used, num_bytes);
|
||||||
btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
|
btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -1249,6 +1242,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|||||||
struct btrfs_block_group *block_group;
|
struct btrfs_block_group *block_group;
|
||||||
struct btrfs_space_info *space_info;
|
struct btrfs_space_info *space_info;
|
||||||
struct btrfs_trans_handle *trans;
|
struct btrfs_trans_handle *trans;
|
||||||
|
const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC);
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||||
@ -1272,10 +1266,28 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|||||||
}
|
}
|
||||||
spin_unlock(&fs_info->unused_bgs_lock);
|
spin_unlock(&fs_info->unused_bgs_lock);
|
||||||
|
|
||||||
|
btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
|
||||||
|
|
||||||
mutex_lock(&fs_info->delete_unused_bgs_mutex);
|
mutex_lock(&fs_info->delete_unused_bgs_mutex);
|
||||||
|
|
||||||
/* Don't want to race with allocators so take the groups_sem */
|
/* Don't want to race with allocators so take the groups_sem */
|
||||||
down_write(&space_info->groups_sem);
|
down_write(&space_info->groups_sem);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Async discard moves the final block group discard to be prior
|
||||||
|
* to the unused_bgs code path. Therefore, if it's not fully
|
||||||
|
* trimmed, punt it back to the async discard lists.
|
||||||
|
*/
|
||||||
|
if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
|
||||||
|
!btrfs_is_free_space_trimmed(block_group)) {
|
||||||
|
trace_btrfs_skip_unused_block_group(block_group);
|
||||||
|
up_write(&space_info->groups_sem);
|
||||||
|
/* Requeue if we failed because of async discard */
|
||||||
|
btrfs_discard_queue_work(&fs_info->discard_ctl,
|
||||||
|
block_group);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock(&block_group->lock);
|
spin_lock(&block_group->lock);
|
||||||
if (block_group->reserved || block_group->pinned ||
|
if (block_group->reserved || block_group->pinned ||
|
||||||
block_group->used || block_group->ro ||
|
block_group->used || block_group->ro ||
|
||||||
@ -1347,6 +1359,23 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|||||||
}
|
}
|
||||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point, the block_group is read only and should fail
|
||||||
|
* new allocations. However, btrfs_finish_extent_commit() can
|
||||||
|
* cause this block_group to be placed back on the discard
|
||||||
|
* lists because now the block_group isn't fully discarded.
|
||||||
|
* Bail here and try again later after discarding everything.
|
||||||
|
*/
|
||||||
|
spin_lock(&fs_info->discard_ctl.lock);
|
||||||
|
if (!list_empty(&block_group->discard_list)) {
|
||||||
|
spin_unlock(&fs_info->discard_ctl.lock);
|
||||||
|
btrfs_dec_block_group_ro(block_group);
|
||||||
|
btrfs_discard_queue_work(&fs_info->discard_ctl,
|
||||||
|
block_group);
|
||||||
|
goto end_trans;
|
||||||
|
}
|
||||||
|
spin_unlock(&fs_info->discard_ctl.lock);
|
||||||
|
|
||||||
/* Reset pinned so btrfs_put_block_group doesn't complain */
|
/* Reset pinned so btrfs_put_block_group doesn't complain */
|
||||||
spin_lock(&space_info->lock);
|
spin_lock(&space_info->lock);
|
||||||
spin_lock(&block_group->lock);
|
spin_lock(&block_group->lock);
|
||||||
@ -1362,8 +1391,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
|||||||
spin_unlock(&block_group->lock);
|
spin_unlock(&block_group->lock);
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The normal path here is an unused block group is passed here,
|
||||||
|
* then trimming is handled in the transaction commit path.
|
||||||
|
* Async discard interposes before this to do the trimming
|
||||||
|
* before coming down the unused block group path as trimming
|
||||||
|
* will no longer be done later in the transaction commit path.
|
||||||
|
*/
|
||||||
|
if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||||
|
goto flip_async;
|
||||||
|
|
||||||
/* DISCARD can flip during remount */
|
/* DISCARD can flip during remount */
|
||||||
trimming = btrfs_test_opt(fs_info, DISCARD);
|
trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
|
||||||
|
|
||||||
/* Implicit trim during transaction commit. */
|
/* Implicit trim during transaction commit. */
|
||||||
if (trimming)
|
if (trimming)
|
||||||
@ -1406,6 +1445,13 @@ next:
|
|||||||
spin_lock(&fs_info->unused_bgs_lock);
|
spin_lock(&fs_info->unused_bgs_lock);
|
||||||
}
|
}
|
||||||
spin_unlock(&fs_info->unused_bgs_lock);
|
spin_unlock(&fs_info->unused_bgs_lock);
|
||||||
|
return;
|
||||||
|
|
||||||
|
flip_async:
|
||||||
|
btrfs_end_transaction(trans);
|
||||||
|
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
btrfs_discard_punt_unused_bgs_list(fs_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
|
void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
|
||||||
@ -1516,6 +1562,102 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
|||||||
write_sequnlock(&fs_info->profiles_lock);
|
write_sequnlock(&fs_info->profiles_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_rmap_block - Map a physical disk address to a list of logical addresses
|
||||||
|
* @chunk_start: logical address of block group
|
||||||
|
* @physical: physical address to map to logical addresses
|
||||||
|
* @logical: return array of logical addresses which map to @physical
|
||||||
|
* @naddrs: length of @logical
|
||||||
|
* @stripe_len: size of IO stripe for the given block group
|
||||||
|
*
|
||||||
|
* Maps a particular @physical disk address to a list of @logical addresses.
|
||||||
|
* Used primarily to exclude those portions of a block group that contain super
|
||||||
|
* block copies.
|
||||||
|
*/
|
||||||
|
EXPORT_FOR_TESTS
|
||||||
|
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||||
|
u64 physical, u64 **logical, int *naddrs, int *stripe_len)
|
||||||
|
{
|
||||||
|
struct extent_map *em;
|
||||||
|
struct map_lookup *map;
|
||||||
|
u64 *buf;
|
||||||
|
u64 bytenr;
|
||||||
|
u64 data_stripe_length;
|
||||||
|
u64 io_stripe_size;
|
||||||
|
int i, nr = 0;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
|
||||||
|
if (IS_ERR(em))
|
||||||
|
return -EIO;
|
||||||
|
|
||||||
|
map = em->map_lookup;
|
||||||
|
data_stripe_length = em->len;
|
||||||
|
io_stripe_size = map->stripe_len;
|
||||||
|
|
||||||
|
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
|
||||||
|
data_stripe_length = div_u64(data_stripe_length,
|
||||||
|
map->num_stripes / map->sub_stripes);
|
||||||
|
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
|
||||||
|
data_stripe_length = div_u64(data_stripe_length, map->num_stripes);
|
||||||
|
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||||
|
data_stripe_length = div_u64(data_stripe_length,
|
||||||
|
nr_data_stripes(map));
|
||||||
|
io_stripe_size = map->stripe_len * nr_data_stripes(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
|
||||||
|
if (!buf) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < map->num_stripes; i++) {
|
||||||
|
bool already_inserted = false;
|
||||||
|
u64 stripe_nr;
|
||||||
|
int j;
|
||||||
|
|
||||||
|
if (!in_range(physical, map->stripes[i].physical,
|
||||||
|
data_stripe_length))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
stripe_nr = physical - map->stripes[i].physical;
|
||||||
|
stripe_nr = div64_u64(stripe_nr, map->stripe_len);
|
||||||
|
|
||||||
|
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
|
||||||
|
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||||
|
stripe_nr = div_u64(stripe_nr, map->sub_stripes);
|
||||||
|
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||||
|
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* The remaining case would be for RAID56, multiply by
|
||||||
|
* nr_data_stripes(). Alternatively, just use rmap_len below
|
||||||
|
* instead of map->stripe_len
|
||||||
|
*/
|
||||||
|
|
||||||
|
bytenr = chunk_start + stripe_nr * io_stripe_size;
|
||||||
|
|
||||||
|
/* Ensure we don't add duplicate addresses */
|
||||||
|
for (j = 0; j < nr; j++) {
|
||||||
|
if (buf[j] == bytenr) {
|
||||||
|
already_inserted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!already_inserted)
|
||||||
|
buf[nr++] = bytenr;
|
||||||
|
}
|
||||||
|
|
||||||
|
*logical = buf;
|
||||||
|
*naddrs = nr;
|
||||||
|
*stripe_len = io_stripe_size;
|
||||||
|
out:
|
||||||
|
free_extent_map(em);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int exclude_super_stripes(struct btrfs_block_group *cache)
|
static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||||
@ -1610,6 +1752,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
|||||||
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
|
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
|
||||||
set_free_space_tree_thresholds(cache);
|
set_free_space_tree_thresholds(cache);
|
||||||
|
|
||||||
|
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
|
||||||
|
|
||||||
atomic_set(&cache->count, 1);
|
atomic_set(&cache->count, 1);
|
||||||
spin_lock_init(&cache->lock);
|
spin_lock_init(&cache->lock);
|
||||||
init_rwsem(&cache->data_rwsem);
|
init_rwsem(&cache->data_rwsem);
|
||||||
@ -1617,6 +1761,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
|||||||
INIT_LIST_HEAD(&cache->cluster_list);
|
INIT_LIST_HEAD(&cache->cluster_list);
|
||||||
INIT_LIST_HEAD(&cache->bg_list);
|
INIT_LIST_HEAD(&cache->bg_list);
|
||||||
INIT_LIST_HEAD(&cache->ro_list);
|
INIT_LIST_HEAD(&cache->ro_list);
|
||||||
|
INIT_LIST_HEAD(&cache->discard_list);
|
||||||
INIT_LIST_HEAD(&cache->dirty_list);
|
INIT_LIST_HEAD(&cache->dirty_list);
|
||||||
INIT_LIST_HEAD(&cache->io_list);
|
INIT_LIST_HEAD(&cache->io_list);
|
||||||
btrfs_init_free_space_ctl(cache);
|
btrfs_init_free_space_ctl(cache);
|
||||||
@ -1775,7 +1920,10 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
|||||||
inc_block_group_ro(cache, 1);
|
inc_block_group_ro(cache, 1);
|
||||||
} else if (cache->used == 0) {
|
} else if (cache->used == 0) {
|
||||||
ASSERT(list_empty(&cache->bg_list));
|
ASSERT(list_empty(&cache->bg_list));
|
||||||
btrfs_mark_bg_unused(cache);
|
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||||
|
btrfs_discard_queue_work(&info->discard_ctl, cache);
|
||||||
|
else
|
||||||
|
btrfs_mark_bg_unused(cache);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
error:
|
error:
|
||||||
@ -2738,8 +2886,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
|||||||
* dirty list to avoid races between cleaner kthread and space
|
* dirty list to avoid races between cleaner kthread and space
|
||||||
* cache writeout.
|
* cache writeout.
|
||||||
*/
|
*/
|
||||||
if (!alloc && old_val == 0)
|
if (!alloc && old_val == 0) {
|
||||||
btrfs_mark_bg_unused(cache);
|
if (!btrfs_test_opt(info, DISCARD_ASYNC))
|
||||||
|
btrfs_mark_bg_unused(cache);
|
||||||
|
}
|
||||||
|
|
||||||
btrfs_put_block_group(cache);
|
btrfs_put_block_group(cache);
|
||||||
total -= num_bytes;
|
total -= num_bytes;
|
||||||
|
@ -12,6 +12,19 @@ enum btrfs_disk_cache_state {
|
|||||||
BTRFS_DC_SETUP,
|
BTRFS_DC_SETUP,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This describes the state of the block_group for async discard. This is due
|
||||||
|
* to the two pass nature of it where extent discarding is prioritized over
|
||||||
|
* bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
|
||||||
|
* between lists to prevent contention for discard state variables
|
||||||
|
* (eg. discard_cursor).
|
||||||
|
*/
|
||||||
|
enum btrfs_discard_state {
|
||||||
|
BTRFS_DISCARD_EXTENTS,
|
||||||
|
BTRFS_DISCARD_BITMAPS,
|
||||||
|
BTRFS_DISCARD_RESET_CURSOR,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
|
* Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
|
||||||
* only allocate a chunk if we really need one.
|
* only allocate a chunk if we really need one.
|
||||||
@ -116,7 +129,13 @@ struct btrfs_block_group {
|
|||||||
/* For read-only block groups */
|
/* For read-only block groups */
|
||||||
struct list_head ro_list;
|
struct list_head ro_list;
|
||||||
|
|
||||||
|
/* For discard operations */
|
||||||
atomic_t trimming;
|
atomic_t trimming;
|
||||||
|
struct list_head discard_list;
|
||||||
|
int discard_index;
|
||||||
|
u64 discard_eligible_time;
|
||||||
|
u64 discard_cursor;
|
||||||
|
enum btrfs_discard_state discard_state;
|
||||||
|
|
||||||
/* For dirty block groups */
|
/* For dirty block groups */
|
||||||
struct list_head dirty_list;
|
struct list_head dirty_list;
|
||||||
@ -158,6 +177,22 @@ struct btrfs_block_group {
|
|||||||
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
|
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
return (block_group->start + block_group->length);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_is_block_group_data_only(
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* In mixed mode the fragmentation is expected to be high, lowering the
|
||||||
|
* efficiency, so only proper data block groups are considered.
|
||||||
|
*/
|
||||||
|
return (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
|
||||||
|
!(block_group->flags & BTRFS_BLOCK_GROUP_METADATA);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
static inline int btrfs_should_fragment_free_space(
|
static inline int btrfs_should_fragment_free_space(
|
||||||
struct btrfs_block_group *block_group)
|
struct btrfs_block_group *block_group)
|
||||||
@ -248,4 +283,9 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
|
|||||||
cache->cached == BTRFS_CACHE_ERROR;
|
cache->cached == BTRFS_CACHE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||||
|
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||||
|
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* BTRFS_BLOCK_GROUP_H */
|
#endif /* BTRFS_BLOCK_GROUP_H */
|
||||||
|
@ -629,7 +629,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
|
|||||||
static int btrfsic_process_superblock(struct btrfsic_state *state,
|
static int btrfsic_process_superblock(struct btrfsic_state *state,
|
||||||
struct btrfs_fs_devices *fs_devices)
|
struct btrfs_fs_devices *fs_devices)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = state->fs_info;
|
|
||||||
struct btrfs_super_block *selected_super;
|
struct btrfs_super_block *selected_super;
|
||||||
struct list_head *dev_head = &fs_devices->devices;
|
struct list_head *dev_head = &fs_devices->devices;
|
||||||
struct btrfs_device *device;
|
struct btrfs_device *device;
|
||||||
@ -637,7 +636,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
int pass;
|
int pass;
|
||||||
|
|
||||||
BUG_ON(NULL == state);
|
|
||||||
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
|
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
|
||||||
if (NULL == selected_super) {
|
if (NULL == selected_super) {
|
||||||
pr_info("btrfsic: error, kmalloc failed!\n");
|
pr_info("btrfsic: error, kmalloc failed!\n");
|
||||||
@ -700,7 +698,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
num_copies = btrfs_num_copies(fs_info, next_bytenr,
|
num_copies = btrfs_num_copies(state->fs_info, next_bytenr,
|
||||||
state->metablock_size);
|
state->metablock_size);
|
||||||
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
|
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
|
||||||
pr_info("num_copies(log_bytenr=%llu) = %d\n",
|
pr_info("num_copies(log_bytenr=%llu) = %d\n",
|
||||||
|
@ -763,7 +763,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||||||
|
|
||||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
||||||
ret = btrfs_lookup_bio_sums(inode, comp_bio,
|
ret = btrfs_lookup_bio_sums(inode, comp_bio,
|
||||||
sums);
|
(u64)-1, sums);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -791,7 +791,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
|||||||
BUG_ON(ret); /* -ENOMEM */
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
|
|
||||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
||||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums);
|
||||||
BUG_ON(ret); /* -ENOMEM */
|
BUG_ON(ret); /* -ENOMEM */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,6 +101,14 @@ struct btrfs_ref;
|
|||||||
|
|
||||||
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
|
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Deltas are an effective way to populate global statistics. Give macro names
|
||||||
|
* to make it clear what we're doing. An example is discard_extents in
|
||||||
|
* btrfs_free_space_ctl.
|
||||||
|
*/
|
||||||
|
#define BTRFS_STAT_NR_ENTRIES 2
|
||||||
|
#define BTRFS_STAT_CURR 0
|
||||||
|
#define BTRFS_STAT_PREV 1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
|
* Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
|
||||||
@ -440,6 +448,36 @@ struct btrfs_full_stripe_locks_tree {
|
|||||||
struct mutex lock;
|
struct mutex lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Discard control. */
|
||||||
|
/*
|
||||||
|
* Async discard uses multiple lists to differentiate the discard filter
|
||||||
|
* parameters. Index 0 is for completely free block groups where we need to
|
||||||
|
* ensure the entire block group is trimmed without being lossy. Indices
|
||||||
|
* afterwards represent monotonically decreasing discard filter sizes to
|
||||||
|
* prioritize what should be discarded next.
|
||||||
|
*/
|
||||||
|
#define BTRFS_NR_DISCARD_LISTS 3
|
||||||
|
#define BTRFS_DISCARD_INDEX_UNUSED 0
|
||||||
|
#define BTRFS_DISCARD_INDEX_START 1
|
||||||
|
|
||||||
|
struct btrfs_discard_ctl {
|
||||||
|
struct workqueue_struct *discard_workers;
|
||||||
|
struct delayed_work work;
|
||||||
|
spinlock_t lock;
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
|
struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
|
||||||
|
u64 prev_discard;
|
||||||
|
atomic_t discardable_extents;
|
||||||
|
atomic64_t discardable_bytes;
|
||||||
|
u64 max_discard_size;
|
||||||
|
unsigned long delay;
|
||||||
|
u32 iops_limit;
|
||||||
|
u32 kbps_limit;
|
||||||
|
u64 discard_extent_bytes;
|
||||||
|
u64 discard_bitmap_bytes;
|
||||||
|
atomic64_t discard_bytes_saved;
|
||||||
|
};
|
||||||
|
|
||||||
/* delayed seq elem */
|
/* delayed seq elem */
|
||||||
struct seq_list {
|
struct seq_list {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
@ -526,6 +564,9 @@ enum {
|
|||||||
* so we don't need to offload checksums to workqueues.
|
* so we don't need to offload checksums to workqueues.
|
||||||
*/
|
*/
|
||||||
BTRFS_FS_CSUM_IMPL_FAST,
|
BTRFS_FS_CSUM_IMPL_FAST,
|
||||||
|
|
||||||
|
/* Indicate that the discard workqueue can service discards. */
|
||||||
|
BTRFS_FS_DISCARD_RUNNING,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct btrfs_fs_info {
|
struct btrfs_fs_info {
|
||||||
@ -816,6 +857,8 @@ struct btrfs_fs_info {
|
|||||||
struct btrfs_workqueue *scrub_wr_completion_workers;
|
struct btrfs_workqueue *scrub_wr_completion_workers;
|
||||||
struct btrfs_workqueue *scrub_parity_workers;
|
struct btrfs_workqueue *scrub_parity_workers;
|
||||||
|
|
||||||
|
struct btrfs_discard_ctl discard_ctl;
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||||
u32 check_integrity_print_mask;
|
u32 check_integrity_print_mask;
|
||||||
#endif
|
#endif
|
||||||
@ -902,6 +945,11 @@ struct btrfs_fs_info {
|
|||||||
spinlock_t ref_verify_lock;
|
spinlock_t ref_verify_lock;
|
||||||
struct rb_root block_tree;
|
struct rb_root block_tree;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
struct kobject *debug_kobj;
|
||||||
|
struct kobject *discard_debug_kobj;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
||||||
@ -1170,7 +1218,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
|||||||
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
|
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
|
||||||
#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
|
#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
|
||||||
#define BTRFS_MOUNT_NOSSD (1 << 9)
|
#define BTRFS_MOUNT_NOSSD (1 << 9)
|
||||||
#define BTRFS_MOUNT_DISCARD (1 << 10)
|
#define BTRFS_MOUNT_DISCARD_SYNC (1 << 10)
|
||||||
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
|
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
|
||||||
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
|
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
|
||||||
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
|
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
|
||||||
@ -1189,6 +1237,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
|||||||
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
|
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
|
||||||
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
|
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
|
||||||
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
|
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
|
||||||
|
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
|
||||||
|
|
||||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||||
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
||||||
@ -2449,8 +2498,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
|
|||||||
|
|
||||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||||
u64 start, u64 len, int delalloc);
|
u64 start, u64 len, int delalloc);
|
||||||
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
|
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start,
|
||||||
u64 start, u64 len);
|
u64 len);
|
||||||
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
|
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
||||||
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||||
@ -2789,9 +2838,7 @@ struct btrfs_dio_private;
|
|||||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root, u64 bytenr, u64 len);
|
struct btrfs_root *root, u64 bytenr, u64 len);
|
||||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||||
u8 *dst);
|
u64 offset, u8 *dst);
|
||||||
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
|
|
||||||
u64 logical_offset);
|
|
||||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
u64 objectid, u64 pos,
|
u64 objectid, u64 pos,
|
||||||
@ -2877,7 +2924,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
|||||||
struct btrfs_root *root);
|
struct btrfs_root *root);
|
||||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||||
struct page *page, size_t pg_offset,
|
struct page *page, size_t pg_offset,
|
||||||
u64 start, u64 end, int create);
|
u64 start, u64 end);
|
||||||
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct inode *inode);
|
struct inode *inode);
|
||||||
@ -3110,17 +3157,21 @@ do { \
|
|||||||
rcu_read_unlock(); \
|
rcu_read_unlock(); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
__cold
|
#ifdef CONFIG_BTRFS_ASSERT
|
||||||
static inline void assfail(const char *expr, const char *file, int line)
|
__cold __noreturn
|
||||||
|
static inline void assertfail(const char *expr, const char *file, int line)
|
||||||
{
|
{
|
||||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
|
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
|
||||||
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
|
BUG();
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ASSERT(expr) \
|
#define ASSERT(expr) \
|
||||||
(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
|
(likely(expr) ? (void)0 : assertfail(#expr, __FILE__, __LINE__))
|
||||||
|
|
||||||
|
#else
|
||||||
|
static inline void assertfail(const char *expr, const char* file, int line) { }
|
||||||
|
#define ASSERT(expr) (void)(expr)
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use that for functions that are conditionally exported for sanity tests but
|
* Use that for functions that are conditionally exported for sanity tests but
|
||||||
|
@ -704,6 +704,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
/* replace the sysfs entry */
|
/* replace the sysfs entry */
|
||||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
|
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
|
||||||
|
btrfs_sysfs_update_devid(tgt_device);
|
||||||
btrfs_rm_dev_replace_free_srcdev(src_device);
|
btrfs_rm_dev_replace_free_srcdev(src_device);
|
||||||
|
|
||||||
/* write back the superblocks */
|
/* write back the superblocks */
|
||||||
|
702
fs/btrfs/discard.c
Normal file
702
fs/btrfs/discard.c
Normal file
@ -0,0 +1,702 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/jiffies.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/ktime.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
#include <linux/math64.h>
|
||||||
|
#include <linux/sizes.h>
|
||||||
|
#include <linux/workqueue.h>
|
||||||
|
#include "ctree.h"
|
||||||
|
#include "block-group.h"
|
||||||
|
#include "discard.h"
|
||||||
|
#include "free-space-cache.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This contains the logic to handle async discard.
|
||||||
|
*
|
||||||
|
* Async discard manages trimming of free space outside of transaction commit.
|
||||||
|
* Discarding is done by managing the block_groups on a LRU list based on free
|
||||||
|
* space recency. Two passes are used to first prioritize discarding extents
|
||||||
|
* and then allow for trimming in the bitmap the best opportunity to coalesce.
|
||||||
|
* The block_groups are maintained on multiple lists to allow for multiple
|
||||||
|
* passes with different discard filter requirements. A delayed work item is
|
||||||
|
* used to manage discarding with timeout determined by a max of the delay
|
||||||
|
* incurred by the iops rate limit, the byte rate limit, and the max delay of
|
||||||
|
* BTRFS_DISCARD_MAX_DELAY.
|
||||||
|
*
|
||||||
|
* Note, this only keeps track of block_groups that are explicitly for data.
|
||||||
|
* Mixed block_groups are not supported.
|
||||||
|
*
|
||||||
|
* The first list is special to manage discarding of fully free block groups.
|
||||||
|
* This is necessary because we issue a final trim for a full free block group
|
||||||
|
* after forgetting it. When a block group becomes unused, instead of directly
|
||||||
|
* being added to the unused_bgs list, we add it to this first list. Then
|
||||||
|
* from there, if it becomes fully discarded, we place it onto the unused_bgs
|
||||||
|
* list.
|
||||||
|
*
|
||||||
|
* The in-memory free space cache serves as the backing state for discard.
|
||||||
|
* Consequently this means there is no persistence. We opt to load all the
|
||||||
|
* block groups in as not discarded, so the mount case degenerates to the
|
||||||
|
* crashing case.
|
||||||
|
*
|
||||||
|
* As the free space cache uses bitmaps, there exists a tradeoff between
|
||||||
|
* ease/efficiency for find_free_extent() and the accuracy of discard state.
|
||||||
|
* Here we opt to let untrimmed regions merge with everything while only letting
|
||||||
|
* trimmed regions merge with other trimmed regions. This can cause
|
||||||
|
* overtrimming, but the coalescing benefit seems to be worth it. Additionally,
|
||||||
|
* bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
|
||||||
|
* the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
|
||||||
|
* this resets the state and we will retry trimming the whole bitmap. This is a
|
||||||
|
* tradeoff between discard state accuracy and the cost of accounting.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* This is an initial delay to give some chance for block reuse */
|
||||||
|
#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
|
||||||
|
#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
|
||||||
|
|
||||||
|
/* Target completion latency of discarding all discardable extents */
|
||||||
|
#define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC)
|
||||||
|
#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
|
||||||
|
#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
|
||||||
|
#define BTRFS_DISCARD_MAX_IOPS (10U)
|
||||||
|
|
||||||
|
/* Montonically decreasing minimum length filters after index 0 */
|
||||||
|
static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
|
||||||
|
0,
|
||||||
|
BTRFS_ASYNC_DISCARD_MAX_FILTER,
|
||||||
|
BTRFS_ASYNC_DISCARD_MIN_FILTER
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
return &discard_ctl->discard_list[block_group->discard_index];
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
if (!btrfs_run_discard_work(discard_ctl))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (list_empty(&block_group->discard_list) ||
|
||||||
|
block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
|
||||||
|
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
|
||||||
|
block_group->discard_index = BTRFS_DISCARD_INDEX_START;
|
||||||
|
block_group->discard_eligible_time = (ktime_get_ns() +
|
||||||
|
BTRFS_DISCARD_DELAY);
|
||||||
|
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_move_tail(&block_group->discard_list,
|
||||||
|
get_discard_list(discard_ctl, block_group));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
if (!btrfs_is_block_group_data_only(block_group))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
__add_to_discard_list(discard_ctl, block_group);
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
if (!btrfs_run_discard_work(discard_ctl)) {
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_del_init(&block_group->discard_list);
|
||||||
|
|
||||||
|
block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
|
||||||
|
block_group->discard_eligible_time = (ktime_get_ns() +
|
||||||
|
BTRFS_DISCARD_UNUSED_DELAY);
|
||||||
|
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
|
||||||
|
list_add_tail(&block_group->discard_list,
|
||||||
|
&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
|
||||||
|
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
bool running = false;
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
if (block_group == discard_ctl->block_group) {
|
||||||
|
running = true;
|
||||||
|
discard_ctl->block_group = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
block_group->discard_eligible_time = 0;
|
||||||
|
list_del_init(&block_group->discard_list);
|
||||||
|
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
return running;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* find_next_block_group - find block_group that's up next for discarding
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @now: current time
|
||||||
|
*
|
||||||
|
* Iterate over the discard lists to find the next block_group up for
|
||||||
|
* discarding checking the discard_eligible_time of block_group.
|
||||||
|
*/
|
||||||
|
static struct btrfs_block_group *find_next_block_group(
|
||||||
|
struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
u64 now)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *ret_block_group = NULL, *block_group;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
|
||||||
|
struct list_head *discard_list = &discard_ctl->discard_list[i];
|
||||||
|
|
||||||
|
if (!list_empty(discard_list)) {
|
||||||
|
block_group = list_first_entry(discard_list,
|
||||||
|
struct btrfs_block_group,
|
||||||
|
discard_list);
|
||||||
|
|
||||||
|
if (!ret_block_group)
|
||||||
|
ret_block_group = block_group;
|
||||||
|
|
||||||
|
if (ret_block_group->discard_eligible_time < now)
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (ret_block_group->discard_eligible_time >
|
||||||
|
block_group->discard_eligible_time)
|
||||||
|
ret_block_group = block_group;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret_block_group;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* peek_discard_list - wrap find_next_block_group()
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @discard_state: the discard_state of the block_group after state management
|
||||||
|
* @discard_index: the discard_index of the block_group after state management
|
||||||
|
*
|
||||||
|
* This wraps find_next_block_group() and sets the block_group to be in use.
|
||||||
|
* discard_state's control flow is managed here. Variables related to
|
||||||
|
* discard_state are reset here as needed (eg discard_cursor). @discard_state
|
||||||
|
* and @discard_index are remembered as it may change while we're discarding,
|
||||||
|
* but we want the discard to execute in the context determined here.
|
||||||
|
*/
|
||||||
|
static struct btrfs_block_group *peek_discard_list(
|
||||||
|
struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
enum btrfs_discard_state *discard_state,
|
||||||
|
int *discard_index)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
|
const u64 now = ktime_get_ns();
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
again:
|
||||||
|
block_group = find_next_block_group(discard_ctl, now);
|
||||||
|
|
||||||
|
if (block_group && now > block_group->discard_eligible_time) {
|
||||||
|
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
|
||||||
|
block_group->used != 0) {
|
||||||
|
if (btrfs_is_block_group_data_only(block_group))
|
||||||
|
__add_to_discard_list(discard_ctl, block_group);
|
||||||
|
else
|
||||||
|
list_del_init(&block_group->discard_list);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
|
||||||
|
block_group->discard_cursor = block_group->start;
|
||||||
|
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
|
||||||
|
}
|
||||||
|
discard_ctl->block_group = block_group;
|
||||||
|
*discard_state = block_group->discard_state;
|
||||||
|
*discard_index = block_group->discard_index;
|
||||||
|
} else {
|
||||||
|
block_group = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
return block_group;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_check_filter - updates a block groups filters
|
||||||
|
* @block_group: block group of interest
|
||||||
|
* @bytes: recently freed region size after coalescing
|
||||||
|
*
|
||||||
|
* Async discard maintains multiple lists with progressively smaller filters
|
||||||
|
* to prioritize discarding based on size. Should a free space that matches
|
||||||
|
* a larger filter be returned to the free_space_cache, prioritize that discard
|
||||||
|
* by moving @block_group to the proper filter.
|
||||||
|
*/
|
||||||
|
void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
|
||||||
|
u64 bytes)
|
||||||
|
{
|
||||||
|
struct btrfs_discard_ctl *discard_ctl;
|
||||||
|
|
||||||
|
if (!block_group ||
|
||||||
|
!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
|
||||||
|
return;
|
||||||
|
|
||||||
|
discard_ctl = &block_group->fs_info->discard_ctl;
|
||||||
|
|
||||||
|
if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
|
||||||
|
bytes >= discard_minlen[block_group->discard_index - 1]) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
remove_from_discard_list(discard_ctl, block_group);
|
||||||
|
|
||||||
|
for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
|
||||||
|
i++) {
|
||||||
|
if (bytes >= discard_minlen[i]) {
|
||||||
|
block_group->discard_index = i;
|
||||||
|
add_to_discard_list(discard_ctl, block_group);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_update_discard_index - moves a block group along the discard lists
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @block_group: block_group of interest
|
||||||
|
*
|
||||||
|
* Increment @block_group's discard_index. If it falls of the list, let it be.
|
||||||
|
* Otherwise add it back to the appropriate list.
|
||||||
|
*/
|
||||||
|
static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
block_group->discard_index++;
|
||||||
|
if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
|
||||||
|
block_group->discard_index = 1;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
add_to_discard_list(discard_ctl, block_group);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_cancel_work - remove a block_group from the discard lists
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @block_group: block_group of interest
|
||||||
|
*
|
||||||
|
* This removes @block_group from the discard lists. If necessary, it waits on
|
||||||
|
* the current work and then reschedules the delayed work.
|
||||||
|
*/
|
||||||
|
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
if (remove_from_discard_list(discard_ctl, block_group)) {
|
||||||
|
cancel_delayed_work_sync(&discard_ctl->work);
|
||||||
|
btrfs_discard_schedule_work(discard_ctl, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_queue_work - handles queuing the block_groups
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @block_group: block_group of interest
|
||||||
|
*
|
||||||
|
* This maintains the LRU order of the discard lists.
|
||||||
|
*/
|
||||||
|
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (block_group->used == 0)
|
||||||
|
add_to_discard_unused_list(discard_ctl, block_group);
|
||||||
|
else
|
||||||
|
add_to_discard_list(discard_ctl, block_group);
|
||||||
|
|
||||||
|
if (!delayed_work_pending(&discard_ctl->work))
|
||||||
|
btrfs_discard_schedule_work(discard_ctl, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_schedule_work - responsible for scheduling the discard work
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @override: override the current timer
|
||||||
|
*
|
||||||
|
* Discards are issued by a delayed workqueue item. @override is used to
|
||||||
|
* update the current delay as the baseline delay interval is reevaluated on
|
||||||
|
* transaction commit. This is also maxed with any other rate limit.
|
||||||
|
*/
|
||||||
|
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
bool override)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
|
const u64 now = ktime_get_ns();
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
if (!btrfs_run_discard_work(discard_ctl))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (!override && delayed_work_pending(&discard_ctl->work))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
block_group = find_next_block_group(discard_ctl, now);
|
||||||
|
if (block_group) {
|
||||||
|
unsigned long delay = discard_ctl->delay;
|
||||||
|
u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A single delayed workqueue item is responsible for
|
||||||
|
* discarding, so we can manage the bytes rate limit by keeping
|
||||||
|
* track of the previous discard.
|
||||||
|
*/
|
||||||
|
if (kbps_limit && discard_ctl->prev_discard) {
|
||||||
|
u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
|
||||||
|
u64 bps_delay = div64_u64(discard_ctl->prev_discard *
|
||||||
|
MSEC_PER_SEC, bps_limit);
|
||||||
|
|
||||||
|
delay = max(delay, msecs_to_jiffies(bps_delay));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This timeout is to hopefully prevent immediate discarding
|
||||||
|
* in a recently allocated block group.
|
||||||
|
*/
|
||||||
|
if (now < block_group->discard_eligible_time) {
|
||||||
|
u64 bg_timeout = block_group->discard_eligible_time - now;
|
||||||
|
|
||||||
|
delay = max(delay, nsecs_to_jiffies(bg_timeout));
|
||||||
|
}
|
||||||
|
|
||||||
|
mod_delayed_work(discard_ctl->discard_workers,
|
||||||
|
&discard_ctl->work, delay);
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_finish_discard_pass - determine next step of a block_group
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
* @block_group: block_group of interest
|
||||||
|
*
|
||||||
|
* This determines the next step for a block group after it's finished going
|
||||||
|
* through a pass on a discard list. If it is unused and fully trimmed, we can
|
||||||
|
* mark it unused and send it to the unused_bgs path. Otherwise, pass it onto
|
||||||
|
* the appropriate filter list or let it fall off.
|
||||||
|
*/
|
||||||
|
static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group)
|
||||||
|
{
|
||||||
|
remove_from_discard_list(discard_ctl, block_group);
|
||||||
|
|
||||||
|
if (block_group->used == 0) {
|
||||||
|
if (btrfs_is_free_space_trimmed(block_group))
|
||||||
|
btrfs_mark_bg_unused(block_group);
|
||||||
|
else
|
||||||
|
add_to_discard_unused_list(discard_ctl, block_group);
|
||||||
|
} else {
|
||||||
|
btrfs_update_discard_index(discard_ctl, block_group);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_workfn - discard work function
|
||||||
|
* @work: work
|
||||||
|
*
|
||||||
|
* This finds the next block_group to start discarding and then discards a
|
||||||
|
* single region. It does this in a two-pass fashion: first extents and second
|
||||||
|
* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
|
||||||
|
*/
|
||||||
|
static void btrfs_discard_workfn(struct work_struct *work)
|
||||||
|
{
|
||||||
|
struct btrfs_discard_ctl *discard_ctl;
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
|
enum btrfs_discard_state discard_state;
|
||||||
|
int discard_index = 0;
|
||||||
|
u64 trimmed = 0;
|
||||||
|
u64 minlen = 0;
|
||||||
|
|
||||||
|
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
|
||||||
|
|
||||||
|
block_group = peek_discard_list(discard_ctl, &discard_state,
|
||||||
|
&discard_index);
|
||||||
|
if (!block_group || !btrfs_run_discard_work(discard_ctl))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Perform discarding */
|
||||||
|
minlen = discard_minlen[discard_index];
|
||||||
|
|
||||||
|
if (discard_state == BTRFS_DISCARD_BITMAPS) {
|
||||||
|
u64 maxlen = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the previous levels minimum discard length as the max
|
||||||
|
* length filter. In the case something is added to make a
|
||||||
|
* region go beyond the max filter, the entire bitmap is set
|
||||||
|
* back to BTRFS_TRIM_STATE_UNTRIMMED.
|
||||||
|
*/
|
||||||
|
if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
|
||||||
|
maxlen = discard_minlen[discard_index - 1];
|
||||||
|
|
||||||
|
btrfs_trim_block_group_bitmaps(block_group, &trimmed,
|
||||||
|
block_group->discard_cursor,
|
||||||
|
btrfs_block_group_end(block_group),
|
||||||
|
minlen, maxlen, true);
|
||||||
|
discard_ctl->discard_bitmap_bytes += trimmed;
|
||||||
|
} else {
|
||||||
|
btrfs_trim_block_group_extents(block_group, &trimmed,
|
||||||
|
block_group->discard_cursor,
|
||||||
|
btrfs_block_group_end(block_group),
|
||||||
|
minlen, true);
|
||||||
|
discard_ctl->discard_extent_bytes += trimmed;
|
||||||
|
}
|
||||||
|
|
||||||
|
discard_ctl->prev_discard = trimmed;
|
||||||
|
|
||||||
|
/* Determine next steps for a block_group */
|
||||||
|
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
|
||||||
|
if (discard_state == BTRFS_DISCARD_BITMAPS) {
|
||||||
|
btrfs_finish_discard_pass(discard_ctl, block_group);
|
||||||
|
} else {
|
||||||
|
block_group->discard_cursor = block_group->start;
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
if (block_group->discard_state !=
|
||||||
|
BTRFS_DISCARD_RESET_CURSOR)
|
||||||
|
block_group->discard_state =
|
||||||
|
BTRFS_DISCARD_BITMAPS;
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
discard_ctl->block_group = NULL;
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
btrfs_discard_schedule_work(discard_ctl, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_run_discard_work - determines if async discard should be running
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
*
|
||||||
|
* Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
|
||||||
|
*/
|
||||||
|
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
|
||||||
|
struct btrfs_fs_info,
|
||||||
|
discard_ctl);
|
||||||
|
|
||||||
|
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
|
||||||
|
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_calc_delay - recalculate the base delay
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
*
|
||||||
|
* Recalculate the base delay which is based off the total number of
|
||||||
|
* discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
|
||||||
|
* and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
|
||||||
|
*/
|
||||||
|
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
||||||
|
{
|
||||||
|
s32 discardable_extents;
|
||||||
|
s64 discardable_bytes;
|
||||||
|
u32 iops_limit;
|
||||||
|
unsigned long delay;
|
||||||
|
unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;
|
||||||
|
|
||||||
|
discardable_extents = atomic_read(&discard_ctl->discardable_extents);
|
||||||
|
if (!discardable_extents)
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The following is to fix a potential -1 discrepenancy that we're not
|
||||||
|
* sure how to reproduce. But given that this is the only place that
|
||||||
|
* utilizes these numbers and this is only called by from
|
||||||
|
* btrfs_finish_extent_commit() which is synchronized, we can correct
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
if (discardable_extents < 0)
|
||||||
|
atomic_add(-discardable_extents,
|
||||||
|
&discard_ctl->discardable_extents);
|
||||||
|
|
||||||
|
discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
|
||||||
|
if (discardable_bytes < 0)
|
||||||
|
atomic64_add(-discardable_bytes,
|
||||||
|
&discard_ctl->discardable_bytes);
|
||||||
|
|
||||||
|
if (discardable_extents <= 0) {
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
iops_limit = READ_ONCE(discard_ctl->iops_limit);
|
||||||
|
if (iops_limit)
|
||||||
|
lower_limit = max_t(unsigned long, lower_limit,
|
||||||
|
MSEC_PER_SEC / iops_limit);
|
||||||
|
|
||||||
|
delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
|
||||||
|
delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
|
||||||
|
discard_ctl->delay = msecs_to_jiffies(delay);
|
||||||
|
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_update_discardable - propagate discard counters
|
||||||
|
* @block_group: block_group of interest
|
||||||
|
* @ctl: free_space_ctl of @block_group
|
||||||
|
*
|
||||||
|
* This propagates deltas of counters up to the discard_ctl. It maintains a
|
||||||
|
* current counter and a previous counter passing the delta up to the global
|
||||||
|
* stat. Then the current counter value becomes the previous counter value.
|
||||||
|
*/
|
||||||
|
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
||||||
|
struct btrfs_free_space_ctl *ctl)
|
||||||
|
{
|
||||||
|
struct btrfs_discard_ctl *discard_ctl;
|
||||||
|
s32 extents_delta;
|
||||||
|
s64 bytes_delta;
|
||||||
|
|
||||||
|
if (!block_group ||
|
||||||
|
!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
|
||||||
|
!btrfs_is_block_group_data_only(block_group))
|
||||||
|
return;
|
||||||
|
|
||||||
|
discard_ctl = &block_group->fs_info->discard_ctl;
|
||||||
|
|
||||||
|
extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
|
||||||
|
ctl->discardable_extents[BTRFS_STAT_PREV];
|
||||||
|
if (extents_delta) {
|
||||||
|
atomic_add(extents_delta, &discard_ctl->discardable_extents);
|
||||||
|
ctl->discardable_extents[BTRFS_STAT_PREV] =
|
||||||
|
ctl->discardable_extents[BTRFS_STAT_CURR];
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
|
||||||
|
ctl->discardable_bytes[BTRFS_STAT_PREV];
|
||||||
|
if (bytes_delta) {
|
||||||
|
atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
|
||||||
|
ctl->discardable_bytes[BTRFS_STAT_PREV] =
|
||||||
|
ctl->discardable_bytes[BTRFS_STAT_CURR];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
|
||||||
|
* @fs_info: fs_info of interest
|
||||||
|
*
|
||||||
|
* The unused_bgs list needs to be punted to the discard lists because the
|
||||||
|
* order of operations is changed. In the normal sychronous discard path, the
|
||||||
|
* block groups are trimmed via a single large trim in transaction commit. This
|
||||||
|
* is ultimately what we are trying to avoid with asynchronous discard. Thus,
|
||||||
|
* it must be done before going down the unused_bgs path.
|
||||||
|
*/
|
||||||
|
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *block_group, *next;
|
||||||
|
|
||||||
|
spin_lock(&fs_info->unused_bgs_lock);
|
||||||
|
/* We enabled async discard, so punt all to the queue */
|
||||||
|
list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
|
||||||
|
bg_list) {
|
||||||
|
list_del_init(&block_group->bg_list);
|
||||||
|
btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
|
||||||
|
}
|
||||||
|
spin_unlock(&fs_info->unused_bgs_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* btrfs_discard_purge_list - purge discard lists
|
||||||
|
* @discard_ctl: discard control
|
||||||
|
*
|
||||||
|
* If we are disabling async discard, we may have intercepted block groups that
|
||||||
|
* are completely free and ready for the unused_bgs path. As discarding will
|
||||||
|
* now happen in transaction commit or not at all, we can safely mark the
|
||||||
|
* corresponding block groups as unused and they will be sent on their merry
|
||||||
|
* way to the unused_bgs list.
|
||||||
|
*/
|
||||||
|
static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *block_group, *next;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
|
||||||
|
list_for_each_entry_safe(block_group, next,
|
||||||
|
&discard_ctl->discard_list[i],
|
||||||
|
discard_list) {
|
||||||
|
list_del_init(&block_group->discard_list);
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
if (block_group->used == 0)
|
||||||
|
btrfs_mark_bg_unused(block_group);
|
||||||
|
spin_lock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock(&discard_ctl->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
|
||||||
|
btrfs_discard_cleanup(fs_info);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
btrfs_discard_punt_unused_bgs_list(fs_info);
|
||||||
|
|
||||||
|
set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_discard_init(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock_init(&discard_ctl->lock);
|
||||||
|
INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
|
||||||
|
|
||||||
|
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
|
||||||
|
INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
|
||||||
|
|
||||||
|
discard_ctl->prev_discard = 0;
|
||||||
|
atomic_set(&discard_ctl->discardable_extents, 0);
|
||||||
|
atomic64_set(&discard_ctl->discardable_bytes, 0);
|
||||||
|
discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
|
||||||
|
discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
|
||||||
|
discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
|
||||||
|
discard_ctl->kbps_limit = 0;
|
||||||
|
discard_ctl->discard_extent_bytes = 0;
|
||||||
|
discard_ctl->discard_bitmap_bytes = 0;
|
||||||
|
atomic64_set(&discard_ctl->discard_bytes_saved, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
btrfs_discard_stop(fs_info);
|
||||||
|
cancel_delayed_work_sync(&fs_info->discard_ctl.work);
|
||||||
|
btrfs_discard_purge_list(&fs_info->discard_ctl);
|
||||||
|
}
|
41
fs/btrfs/discard.h
Normal file
41
fs/btrfs/discard.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#ifndef BTRFS_DISCARD_H
|
||||||
|
#define BTRFS_DISCARD_H
|
||||||
|
|
||||||
|
#include <linux/sizes.h>
|
||||||
|
|
||||||
|
struct btrfs_fs_info;
|
||||||
|
struct btrfs_discard_ctl;
|
||||||
|
struct btrfs_block_group;
|
||||||
|
|
||||||
|
/* Discard size limits */
|
||||||
|
#define BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE (SZ_64M)
|
||||||
|
#define BTRFS_ASYNC_DISCARD_MAX_FILTER (SZ_1M)
|
||||||
|
#define BTRFS_ASYNC_DISCARD_MIN_FILTER (SZ_32K)
|
||||||
|
|
||||||
|
/* List operations */
|
||||||
|
void btrfs_discard_check_filter(struct btrfs_block_group *block_group, u64 bytes);
|
||||||
|
|
||||||
|
/* Work operations */
|
||||||
|
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group);
|
||||||
|
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
struct btrfs_block_group *block_group);
|
||||||
|
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||||
|
bool override);
|
||||||
|
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
|
||||||
|
|
||||||
|
/* Update operations */
|
||||||
|
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
|
||||||
|
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
||||||
|
struct btrfs_free_space_ctl *ctl);
|
||||||
|
|
||||||
|
/* Setup/cleanup operations */
|
||||||
|
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
|
||||||
|
void btrfs_discard_resume(struct btrfs_fs_info *fs_info);
|
||||||
|
void btrfs_discard_stop(struct btrfs_fs_info *fs_info);
|
||||||
|
void btrfs_discard_init(struct btrfs_fs_info *fs_info);
|
||||||
|
void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info);
|
||||||
|
|
||||||
|
#endif
|
@ -41,6 +41,7 @@
|
|||||||
#include "tree-checker.h"
|
#include "tree-checker.h"
|
||||||
#include "ref-verify.h"
|
#include "ref-verify.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
|
#include "discard.h"
|
||||||
|
|
||||||
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
|
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
|
||||||
BTRFS_HEADER_FLAG_RELOC |\
|
BTRFS_HEADER_FLAG_RELOC |\
|
||||||
@ -202,8 +203,8 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
|
|||||||
* that covers the entire device
|
* that covers the entire device
|
||||||
*/
|
*/
|
||||||
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
|
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
|
||||||
struct page *page, size_t pg_offset, u64 start, u64 len,
|
struct page *page, size_t pg_offset,
|
||||||
int create)
|
u64 start, u64 len)
|
||||||
{
|
{
|
||||||
struct extent_map_tree *em_tree = &inode->extent_tree;
|
struct extent_map_tree *em_tree = &inode->extent_tree;
|
||||||
struct extent_map *em;
|
struct extent_map *em;
|
||||||
@ -1953,6 +1954,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
|||||||
btrfs_destroy_workqueue(fs_info->readahead_workers);
|
btrfs_destroy_workqueue(fs_info->readahead_workers);
|
||||||
btrfs_destroy_workqueue(fs_info->flush_workers);
|
btrfs_destroy_workqueue(fs_info->flush_workers);
|
||||||
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
|
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
|
||||||
|
if (fs_info->discard_ctl.discard_workers)
|
||||||
|
destroy_workqueue(fs_info->discard_ctl.discard_workers);
|
||||||
/*
|
/*
|
||||||
* Now that all other work queues are destroyed, we can safely destroy
|
* Now that all other work queues are destroyed, we can safely destroy
|
||||||
* the queues used for metadata I/O, since tasks from those other work
|
* the queues used for metadata I/O, since tasks from those other work
|
||||||
@ -2148,6 +2151,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
|
|||||||
max_active, 2);
|
max_active, 2);
|
||||||
fs_info->qgroup_rescan_workers =
|
fs_info->qgroup_rescan_workers =
|
||||||
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
|
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
|
||||||
|
fs_info->discard_ctl.discard_workers =
|
||||||
|
alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1);
|
||||||
|
|
||||||
if (!(fs_info->workers && fs_info->delalloc_workers &&
|
if (!(fs_info->workers && fs_info->delalloc_workers &&
|
||||||
fs_info->flush_workers &&
|
fs_info->flush_workers &&
|
||||||
@ -2158,7 +2163,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
|
|||||||
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
|
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
|
||||||
fs_info->caching_workers && fs_info->readahead_workers &&
|
fs_info->caching_workers && fs_info->readahead_workers &&
|
||||||
fs_info->fixup_workers && fs_info->delayed_workers &&
|
fs_info->fixup_workers && fs_info->delayed_workers &&
|
||||||
fs_info->qgroup_rescan_workers)) {
|
fs_info->qgroup_rescan_workers &&
|
||||||
|
fs_info->discard_ctl.discard_workers)) {
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2792,6 +2798,7 @@ int __cold open_ctree(struct super_block *sb,
|
|||||||
|
|
||||||
btrfs_init_dev_replace_locks(fs_info);
|
btrfs_init_dev_replace_locks(fs_info);
|
||||||
btrfs_init_qgroup(fs_info);
|
btrfs_init_qgroup(fs_info);
|
||||||
|
btrfs_discard_init(fs_info);
|
||||||
|
|
||||||
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
|
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
|
||||||
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
|
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
|
||||||
@ -3082,20 +3089,13 @@ int __cold open_ctree(struct super_block *sb,
|
|||||||
|
|
||||||
btrfs_free_extra_devids(fs_devices, 1);
|
btrfs_free_extra_devids(fs_devices, 1);
|
||||||
|
|
||||||
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
|
ret = btrfs_sysfs_add_fsid(fs_devices);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
|
btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
|
||||||
ret);
|
ret);
|
||||||
goto fail_block_groups;
|
goto fail_block_groups;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = btrfs_sysfs_add_device(fs_devices);
|
|
||||||
if (ret) {
|
|
||||||
btrfs_err(fs_info, "failed to init sysfs device interface: %d",
|
|
||||||
ret);
|
|
||||||
goto fail_fsdev_sysfs;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = btrfs_sysfs_add_mounted(fs_info);
|
ret = btrfs_sysfs_add_mounted(fs_info);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
|
btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
|
||||||
@ -3262,6 +3262,7 @@ int __cold open_ctree(struct super_block *sb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
btrfs_qgroup_rescan_resume(fs_info);
|
btrfs_qgroup_rescan_resume(fs_info);
|
||||||
|
btrfs_discard_resume(fs_info);
|
||||||
|
|
||||||
if (!fs_info->uuid_root) {
|
if (!fs_info->uuid_root) {
|
||||||
btrfs_info(fs_info, "creating UUID tree");
|
btrfs_info(fs_info, "creating UUID tree");
|
||||||
@ -3978,6 +3979,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
|||||||
|
|
||||||
cancel_work_sync(&fs_info->async_reclaim_work);
|
cancel_work_sync(&fs_info->async_reclaim_work);
|
||||||
|
|
||||||
|
/* Cancel or finish ongoing discard work */
|
||||||
|
btrfs_discard_cleanup(fs_info);
|
||||||
|
|
||||||
if (!sb_rdonly(fs_info->sb)) {
|
if (!sb_rdonly(fs_info->sb)) {
|
||||||
/*
|
/*
|
||||||
* The cleaner kthread is stopped, so do one final pass over
|
* The cleaner kthread is stopped, so do one final pass over
|
||||||
@ -4026,11 +4030,18 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
|||||||
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
|
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
|
||||||
btrfs_stop_all_workers(fs_info);
|
btrfs_stop_all_workers(fs_info);
|
||||||
|
|
||||||
btrfs_free_block_groups(fs_info);
|
|
||||||
|
|
||||||
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
||||||
free_root_pointers(fs_info, true);
|
free_root_pointers(fs_info, true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We must free the block groups after dropping the fs_roots as we could
|
||||||
|
* have had an IO error and have left over tree log blocks that aren't
|
||||||
|
* cleaned up until the fs roots are freed. This makes the block group
|
||||||
|
* accounting appear to be wrong because there's pending reserved bytes,
|
||||||
|
* so make sure we do the block group cleanup afterwards.
|
||||||
|
*/
|
||||||
|
btrfs_free_block_groups(fs_info);
|
||||||
|
|
||||||
iput(fs_info->btree_inode);
|
iput(fs_info->btree_inode);
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||||
|
@ -134,8 +134,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
|||||||
int btree_lock_page_hook(struct page *page, void *data,
|
int btree_lock_page_hook(struct page *page, void *data,
|
||||||
void (*flush_fn)(void *));
|
void (*flush_fn)(void *));
|
||||||
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
|
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
|
||||||
struct page *page, size_t pg_offset, u64 start, u64 len,
|
struct page *page, size_t pg_offset,
|
||||||
int create);
|
u64 start, u64 len);
|
||||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||||
int __init btrfs_end_io_wq_init(void);
|
int __init btrfs_end_io_wq_init(void);
|
||||||
void __cold btrfs_end_io_wq_exit(void);
|
void __cold btrfs_end_io_wq_exit(void);
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include "block-rsv.h"
|
#include "block-rsv.h"
|
||||||
#include "delalloc-space.h"
|
#include "delalloc-space.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
|
#include "discard.h"
|
||||||
|
|
||||||
#undef SCRAMBLE_DELAYED_REFS
|
#undef SCRAMBLE_DELAYED_REFS
|
||||||
|
|
||||||
@ -2923,7 +2924,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (btrfs_test_opt(fs_info, DISCARD))
|
if (btrfs_test_opt(fs_info, DISCARD_SYNC))
|
||||||
ret = btrfs_discard_extent(fs_info, start,
|
ret = btrfs_discard_extent(fs_info, start,
|
||||||
end + 1 - start, NULL);
|
end + 1 - start, NULL);
|
||||||
|
|
||||||
@ -2934,6 +2935,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
|
||||||
|
btrfs_discard_calc_delay(&fs_info->discard_ctl);
|
||||||
|
btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Transaction is finished. We don't need the lock anymore. We
|
* Transaction is finished. We don't need the lock anymore. We
|
||||||
* do need to clean up the block groups in case of a transaction
|
* do need to clean up the block groups in case of a transaction
|
||||||
@ -3438,7 +3444,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
|
|||||||
*/
|
*/
|
||||||
struct find_free_extent_ctl {
|
struct find_free_extent_ctl {
|
||||||
/* Basic allocation info */
|
/* Basic allocation info */
|
||||||
u64 ram_bytes;
|
|
||||||
u64 num_bytes;
|
u64 num_bytes;
|
||||||
u64 empty_size;
|
u64 empty_size;
|
||||||
u64 flags;
|
u64 flags;
|
||||||
@ -3810,7 +3815,6 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
WARN_ON(num_bytes < fs_info->sectorsize);
|
||||||
|
|
||||||
ffe_ctl.ram_bytes = ram_bytes;
|
|
||||||
ffe_ctl.num_bytes = num_bytes;
|
ffe_ctl.num_bytes = num_bytes;
|
||||||
ffe_ctl.empty_size = empty_size;
|
ffe_ctl.empty_size = empty_size;
|
||||||
ffe_ctl.flags = flags;
|
ffe_ctl.flags = flags;
|
||||||
@ -4165,12 +4169,10 @@ again:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||||
u64 start, u64 len,
|
u64 start, u64 len, int delalloc)
|
||||||
int pin, int delalloc)
|
|
||||||
{
|
{
|
||||||
struct btrfs_block_group *cache;
|
struct btrfs_block_group *cache;
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
cache = btrfs_lookup_block_group(fs_info, start);
|
cache = btrfs_lookup_block_group(fs_info, start);
|
||||||
if (!cache) {
|
if (!cache) {
|
||||||
@ -4179,32 +4181,30 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
|||||||
return -ENOSPC;
|
return -ENOSPC;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pin)
|
btrfs_add_free_space(cache, start, len);
|
||||||
pin_down_extent(cache, start, len, 1);
|
btrfs_free_reserved_bytes(cache, len, delalloc);
|
||||||
else {
|
trace_btrfs_reserved_extent_free(fs_info, start, len);
|
||||||
if (btrfs_test_opt(fs_info, DISCARD))
|
|
||||||
ret = btrfs_discard_extent(fs_info, start, len, NULL);
|
|
||||||
btrfs_add_free_space(cache, start, len);
|
|
||||||
btrfs_free_reserved_bytes(cache, len, delalloc);
|
|
||||||
trace_btrfs_reserved_extent_free(fs_info, start, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
btrfs_put_block_group(cache);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
|
||||||
|
{
|
||||||
|
struct btrfs_block_group *cache;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
cache = btrfs_lookup_block_group(fs_info, start);
|
||||||
|
if (!cache) {
|
||||||
|
btrfs_err(fs_info, "unable to find block group for %llu", start);
|
||||||
|
return -ENOSPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = pin_down_extent(cache, start, len, 1);
|
||||||
btrfs_put_block_group(cache);
|
btrfs_put_block_group(cache);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
|
||||||
u64 start, u64 len, int delalloc)
|
|
||||||
{
|
|
||||||
return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
|
|
||||||
}
|
|
||||||
|
|
||||||
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
|
|
||||||
u64 start, u64 len)
|
|
||||||
{
|
|
||||||
return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||||
u64 parent, u64 root_objectid,
|
u64 parent, u64 root_objectid,
|
||||||
u64 flags, u64 owner, u64 offset,
|
u64 flags, u64 owner, u64 offset,
|
||||||
|
@ -3043,7 +3043,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
|
|||||||
*em_cached = NULL;
|
*em_cached = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
|
em = get_extent(BTRFS_I(inode), page, pg_offset, start, len);
|
||||||
if (em_cached && !IS_ERR_OR_NULL(em)) {
|
if (em_cached && !IS_ERR_OR_NULL(em)) {
|
||||||
BUG_ON(*em_cached);
|
BUG_ON(*em_cached);
|
||||||
refcount_inc(&em->refs);
|
refcount_inc(&em->refs);
|
||||||
@ -3455,11 +3455,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
|||||||
update_nr_written(wbc, nr_written + 1);
|
update_nr_written(wbc, nr_written + 1);
|
||||||
|
|
||||||
end = page_end;
|
end = page_end;
|
||||||
if (i_size <= start) {
|
|
||||||
btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
blocksize = inode->i_sb->s_blocksize;
|
blocksize = inode->i_sb->s_blocksize;
|
||||||
|
|
||||||
while (cur <= end) {
|
while (cur <= end) {
|
||||||
@ -3471,8 +3466,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
|||||||
page_end, 1);
|
page_end, 1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur,
|
||||||
end - cur + 1, 1);
|
end - cur + 1);
|
||||||
if (IS_ERR_OR_NULL(em)) {
|
if (IS_ERR_OR_NULL(em)) {
|
||||||
SetPageError(page);
|
SetPageError(page);
|
||||||
ret = PTR_ERR_OR_ZERO(em);
|
ret = PTR_ERR_OR_ZERO(em);
|
||||||
@ -3497,22 +3492,11 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
|||||||
*/
|
*/
|
||||||
if (compressed || block_start == EXTENT_MAP_HOLE ||
|
if (compressed || block_start == EXTENT_MAP_HOLE ||
|
||||||
block_start == EXTENT_MAP_INLINE) {
|
block_start == EXTENT_MAP_INLINE) {
|
||||||
/*
|
if (compressed)
|
||||||
* end_io notification does not happen here for
|
|
||||||
* compressed extents
|
|
||||||
*/
|
|
||||||
if (!compressed)
|
|
||||||
btrfs_writepage_endio_finish_ordered(page, cur,
|
|
||||||
cur + iosize - 1,
|
|
||||||
1);
|
|
||||||
else if (compressed) {
|
|
||||||
/* we don't want to end_page_writeback on
|
|
||||||
* a compressed extent. this happens
|
|
||||||
* elsewhere
|
|
||||||
*/
|
|
||||||
nr++;
|
nr++;
|
||||||
}
|
else
|
||||||
|
btrfs_writepage_endio_finish_ordered(page, cur,
|
||||||
|
cur + iosize - 1, 1);
|
||||||
cur += iosize;
|
cur += iosize;
|
||||||
pg_offset += iosize;
|
pg_offset += iosize;
|
||||||
continue;
|
continue;
|
||||||
@ -3540,7 +3524,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
|||||||
pg_offset += iosize;
|
pg_offset += iosize;
|
||||||
nr++;
|
nr++;
|
||||||
}
|
}
|
||||||
done:
|
|
||||||
*nr_ret = nr;
|
*nr_ret = nr;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -3562,7 +3545,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
|||||||
u64 page_end = start + PAGE_SIZE - 1;
|
u64 page_end = start + PAGE_SIZE - 1;
|
||||||
int ret;
|
int ret;
|
||||||
int nr = 0;
|
int nr = 0;
|
||||||
size_t pg_offset = 0;
|
size_t pg_offset;
|
||||||
loff_t i_size = i_size_read(inode);
|
loff_t i_size = i_size_read(inode);
|
||||||
unsigned long end_index = i_size >> PAGE_SHIFT;
|
unsigned long end_index = i_size >> PAGE_SHIFT;
|
||||||
unsigned long nr_written = 0;
|
unsigned long nr_written = 0;
|
||||||
@ -3591,14 +3574,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
|||||||
flush_dcache_page(page);
|
flush_dcache_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
pg_offset = 0;
|
|
||||||
|
|
||||||
set_page_extent_mapped(page);
|
set_page_extent_mapped(page);
|
||||||
|
|
||||||
if (!epd->extent_locked) {
|
if (!epd->extent_locked) {
|
||||||
ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
|
ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
|
||||||
if (ret == 1)
|
if (ret == 1)
|
||||||
goto done_unlocked;
|
return 0;
|
||||||
if (ret)
|
if (ret)
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -3606,7 +3587,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
|||||||
ret = __extent_writepage_io(inode, page, wbc, epd,
|
ret = __extent_writepage_io(inode, page, wbc, epd,
|
||||||
i_size, nr_written, &nr);
|
i_size, nr_written, &nr);
|
||||||
if (ret == 1)
|
if (ret == 1)
|
||||||
goto done_unlocked;
|
return 0;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
if (nr == 0) {
|
if (nr == 0) {
|
||||||
@ -3621,9 +3602,6 @@ done:
|
|||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
ASSERT(ret <= 0);
|
ASSERT(ret <= 0);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
done_unlocked:
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
||||||
@ -3941,6 +3919,11 @@ int btree_write_cache_pages(struct address_space *mapping,
|
|||||||
if (wbc->range_cyclic) {
|
if (wbc->range_cyclic) {
|
||||||
index = mapping->writeback_index; /* Start from prev offset */
|
index = mapping->writeback_index; /* Start from prev offset */
|
||||||
end = -1;
|
end = -1;
|
||||||
|
/*
|
||||||
|
* Start from the beginning does not need to cycle over the
|
||||||
|
* range, mark it as scanned.
|
||||||
|
*/
|
||||||
|
scanned = (index == 0);
|
||||||
} else {
|
} else {
|
||||||
index = wbc->range_start >> PAGE_SHIFT;
|
index = wbc->range_start >> PAGE_SHIFT;
|
||||||
end = wbc->range_end >> PAGE_SHIFT;
|
end = wbc->range_end >> PAGE_SHIFT;
|
||||||
@ -3958,7 +3941,6 @@ retry:
|
|||||||
tag))) {
|
tag))) {
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
scanned = 1;
|
|
||||||
for (i = 0; i < nr_pages; i++) {
|
for (i = 0; i < nr_pages; i++) {
|
||||||
struct page *page = pvec.pages[i];
|
struct page *page = pvec.pages[i];
|
||||||
|
|
||||||
@ -4087,6 +4069,11 @@ static int extent_write_cache_pages(struct address_space *mapping,
|
|||||||
if (wbc->range_cyclic) {
|
if (wbc->range_cyclic) {
|
||||||
index = mapping->writeback_index; /* Start from prev offset */
|
index = mapping->writeback_index; /* Start from prev offset */
|
||||||
end = -1;
|
end = -1;
|
||||||
|
/*
|
||||||
|
* Start from the beginning does not need to cycle over the
|
||||||
|
* range, mark it as scanned.
|
||||||
|
*/
|
||||||
|
scanned = (index == 0);
|
||||||
} else {
|
} else {
|
||||||
index = wbc->range_start >> PAGE_SHIFT;
|
index = wbc->range_start >> PAGE_SHIFT;
|
||||||
end = wbc->range_end >> PAGE_SHIFT;
|
end = wbc->range_end >> PAGE_SHIFT;
|
||||||
@ -4120,7 +4107,6 @@ retry:
|
|||||||
&index, end, tag))) {
|
&index, end, tag))) {
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
scanned = 1;
|
|
||||||
for (i = 0; i < nr_pages; i++) {
|
for (i = 0; i < nr_pages; i++) {
|
||||||
struct page *page = pvec.pages[i];
|
struct page *page = pvec.pages[i];
|
||||||
|
|
||||||
|
@ -183,10 +183,8 @@ static inline int extent_compress_type(unsigned long bio_flags)
|
|||||||
struct extent_map_tree;
|
struct extent_map_tree;
|
||||||
|
|
||||||
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
||||||
struct page *page,
|
struct page *page, size_t pg_offset,
|
||||||
size_t pg_offset,
|
u64 start, u64 len);
|
||||||
u64 start, u64 len,
|
|
||||||
int create);
|
|
||||||
|
|
||||||
int try_release_extent_mapping(struct page *page, gfp_t mask);
|
int try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||||
int try_release_extent_buffer(struct page *page);
|
int try_release_extent_buffer(struct page *page);
|
||||||
|
@ -148,8 +148,19 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
/**
|
||||||
u64 logical_offset, u8 *dst, int dio)
|
* btrfs_lookup_bio_sums - Look up checksums for a bio.
|
||||||
|
* @inode: inode that the bio is for.
|
||||||
|
* @bio: bio embedded in btrfs_io_bio.
|
||||||
|
* @offset: Unless (u64)-1, look up checksums for this offset in the file.
|
||||||
|
* If (u64)-1, use the page offsets from the bio instead.
|
||||||
|
* @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If
|
||||||
|
* NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead.
|
||||||
|
*
|
||||||
|
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
||||||
|
*/
|
||||||
|
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||||
|
u64 offset, u8 *dst)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
struct bio_vec bvec;
|
struct bio_vec bvec;
|
||||||
@ -158,8 +169,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
|
|||||||
struct btrfs_csum_item *item = NULL;
|
struct btrfs_csum_item *item = NULL;
|
||||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
|
const bool page_offsets = (offset == (u64)-1);
|
||||||
u8 *csum;
|
u8 *csum;
|
||||||
u64 offset = 0;
|
|
||||||
u64 item_start_offset = 0;
|
u64 item_start_offset = 0;
|
||||||
u64 item_last_offset = 0;
|
u64 item_last_offset = 0;
|
||||||
u64 disk_bytenr;
|
u64 disk_bytenr;
|
||||||
@ -205,15 +216,13 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
|
|||||||
}
|
}
|
||||||
|
|
||||||
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
|
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
|
||||||
if (dio)
|
|
||||||
offset = logical_offset;
|
|
||||||
|
|
||||||
bio_for_each_segment(bvec, bio, iter) {
|
bio_for_each_segment(bvec, bio, iter) {
|
||||||
page_bytes_left = bvec.bv_len;
|
page_bytes_left = bvec.bv_len;
|
||||||
if (count)
|
if (count)
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
if (!dio)
|
if (page_offsets)
|
||||||
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
||||||
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
|
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
|
||||||
csum, nblocks);
|
csum, nblocks);
|
||||||
@ -274,7 +283,8 @@ found:
|
|||||||
csum += count * csum_size;
|
csum += count * csum_size;
|
||||||
nblocks -= count;
|
nblocks -= count;
|
||||||
next:
|
next:
|
||||||
while (count--) {
|
while (count > 0) {
|
||||||
|
count--;
|
||||||
disk_bytenr += fs_info->sectorsize;
|
disk_bytenr += fs_info->sectorsize;
|
||||||
offset += fs_info->sectorsize;
|
offset += fs_info->sectorsize;
|
||||||
page_bytes_left -= fs_info->sectorsize;
|
page_bytes_left -= fs_info->sectorsize;
|
||||||
@ -285,18 +295,7 @@ next:
|
|||||||
|
|
||||||
WARN_ON_ONCE(count);
|
WARN_ON_ONCE(count);
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
return 0;
|
return BLK_STS_OK;
|
||||||
}
|
|
||||||
|
|
||||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
|
||||||
u8 *dst)
|
|
||||||
{
|
|
||||||
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
|
|
||||||
{
|
|
||||||
return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||||
@ -483,8 +482,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
|||||||
- 1);
|
- 1);
|
||||||
|
|
||||||
for (i = 0; i < nr_sectors; i++) {
|
for (i = 0; i < nr_sectors; i++) {
|
||||||
if (offset >= ordered->file_offset + ordered->len ||
|
if (offset >= ordered->file_offset + ordered->num_bytes ||
|
||||||
offset < ordered->file_offset) {
|
offset < ordered->file_offset) {
|
||||||
unsigned long bytes_left;
|
unsigned long bytes_left;
|
||||||
|
|
||||||
sums->len = this_sum_bytes;
|
sums->len = this_sum_bytes;
|
||||||
|
@ -477,8 +477,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
|
|||||||
u64 em_len;
|
u64 em_len;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
em = btrfs_get_extent(inode, NULL, 0, search_start,
|
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
|
||||||
search_len, 0);
|
|
||||||
if (IS_ERR(em))
|
if (IS_ERR(em))
|
||||||
return PTR_ERR(em);
|
return PTR_ERR(em);
|
||||||
|
|
||||||
@ -1501,7 +1500,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
|||||||
ordered = btrfs_lookup_ordered_range(inode, start_pos,
|
ordered = btrfs_lookup_ordered_range(inode, start_pos,
|
||||||
last_pos - start_pos + 1);
|
last_pos - start_pos + 1);
|
||||||
if (ordered &&
|
if (ordered &&
|
||||||
ordered->file_offset + ordered->len > start_pos &&
|
ordered->file_offset + ordered->num_bytes > start_pos &&
|
||||||
ordered->file_offset <= last_pos) {
|
ordered->file_offset <= last_pos) {
|
||||||
unlock_extent_cached(&inode->io_tree, start_pos,
|
unlock_extent_cached(&inode->io_tree, start_pos,
|
||||||
last_pos, cached_state);
|
last_pos, cached_state);
|
||||||
@ -2390,7 +2389,7 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
|
|||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
||||||
round_down(*start, fs_info->sectorsize),
|
round_down(*start, fs_info->sectorsize),
|
||||||
round_up(*len, fs_info->sectorsize), 0);
|
round_up(*len, fs_info->sectorsize));
|
||||||
if (IS_ERR(em))
|
if (IS_ERR(em))
|
||||||
return PTR_ERR(em);
|
return PTR_ERR(em);
|
||||||
|
|
||||||
@ -2426,7 +2425,7 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
|
|||||||
* we need to try again.
|
* we need to try again.
|
||||||
*/
|
*/
|
||||||
if ((!ordered ||
|
if ((!ordered ||
|
||||||
(ordered->file_offset + ordered->len <= lockstart ||
|
(ordered->file_offset + ordered->num_bytes <= lockstart ||
|
||||||
ordered->file_offset > lockend)) &&
|
ordered->file_offset > lockend)) &&
|
||||||
!filemap_range_has_page(inode->i_mapping,
|
!filemap_range_has_page(inode->i_mapping,
|
||||||
lockstart, lockend)) {
|
lockstart, lockend)) {
|
||||||
@ -2957,7 +2956,7 @@ static int btrfs_zero_range_check_range_boundary(struct inode *inode,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
offset = round_down(offset, sectorsize);
|
offset = round_down(offset, sectorsize);
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em))
|
if (IS_ERR(em))
|
||||||
return PTR_ERR(em);
|
return PTR_ERR(em);
|
||||||
|
|
||||||
@ -2990,8 +2989,8 @@ static int btrfs_zero_range(struct inode *inode,
|
|||||||
|
|
||||||
inode_dio_wait(inode);
|
inode_dio_wait(inode);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
|
||||||
alloc_start, alloc_end - alloc_start, 0);
|
alloc_end - alloc_start);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
ret = PTR_ERR(em);
|
ret = PTR_ERR(em);
|
||||||
goto out;
|
goto out;
|
||||||
@ -3034,8 +3033,8 @@ static int btrfs_zero_range(struct inode *inode,
|
|||||||
|
|
||||||
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
|
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
|
||||||
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
|
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
|
||||||
alloc_start, sectorsize, 0);
|
sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
ret = PTR_ERR(em);
|
ret = PTR_ERR(em);
|
||||||
goto out;
|
goto out;
|
||||||
@ -3248,7 +3247,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||||||
ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
|
ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
|
||||||
|
|
||||||
if (ordered &&
|
if (ordered &&
|
||||||
ordered->file_offset + ordered->len > alloc_start &&
|
ordered->file_offset + ordered->num_bytes > alloc_start &&
|
||||||
ordered->file_offset < alloc_end) {
|
ordered->file_offset < alloc_end) {
|
||||||
btrfs_put_ordered_extent(ordered);
|
btrfs_put_ordered_extent(ordered);
|
||||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
|
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
|
||||||
@ -3273,7 +3272,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
|||||||
INIT_LIST_HEAD(&reserve_list);
|
INIT_LIST_HEAD(&reserve_list);
|
||||||
while (cur_offset < alloc_end) {
|
while (cur_offset < alloc_end) {
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
|
||||||
alloc_end - cur_offset, 0);
|
alloc_end - cur_offset);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
ret = PTR_ERR(em);
|
ret = PTR_ERR(em);
|
||||||
break;
|
break;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,20 @@
|
|||||||
#ifndef BTRFS_FREE_SPACE_CACHE_H
|
#ifndef BTRFS_FREE_SPACE_CACHE_H
|
||||||
#define BTRFS_FREE_SPACE_CACHE_H
|
#define BTRFS_FREE_SPACE_CACHE_H
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the trim state of an extent or bitmap.
|
||||||
|
*
|
||||||
|
* BTRFS_TRIM_STATE_TRIMMING is special and used to maintain the state of a
|
||||||
|
* bitmap as we may need several trims to fully trim a single bitmap entry.
|
||||||
|
* This is reset should any free space other than trimmed space be added to the
|
||||||
|
* bitmap.
|
||||||
|
*/
|
||||||
|
enum btrfs_trim_state {
|
||||||
|
BTRFS_TRIM_STATE_UNTRIMMED,
|
||||||
|
BTRFS_TRIM_STATE_TRIMMED,
|
||||||
|
BTRFS_TRIM_STATE_TRIMMING,
|
||||||
|
};
|
||||||
|
|
||||||
struct btrfs_free_space {
|
struct btrfs_free_space {
|
||||||
struct rb_node offset_index;
|
struct rb_node offset_index;
|
||||||
u64 offset;
|
u64 offset;
|
||||||
@ -13,8 +27,21 @@ struct btrfs_free_space {
|
|||||||
u64 max_extent_size;
|
u64 max_extent_size;
|
||||||
unsigned long *bitmap;
|
unsigned long *bitmap;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
enum btrfs_trim_state trim_state;
|
||||||
|
s32 bitmap_extents;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline bool btrfs_free_space_trimmed(struct btrfs_free_space *info)
|
||||||
|
{
|
||||||
|
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMED);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool btrfs_free_space_trimming_bitmap(
|
||||||
|
struct btrfs_free_space *info)
|
||||||
|
{
|
||||||
|
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING);
|
||||||
|
}
|
||||||
|
|
||||||
struct btrfs_free_space_ctl {
|
struct btrfs_free_space_ctl {
|
||||||
spinlock_t tree_lock;
|
spinlock_t tree_lock;
|
||||||
struct rb_root free_space_offset;
|
struct rb_root free_space_offset;
|
||||||
@ -24,6 +51,8 @@ struct btrfs_free_space_ctl {
|
|||||||
int total_bitmaps;
|
int total_bitmaps;
|
||||||
int unit;
|
int unit;
|
||||||
u64 start;
|
u64 start;
|
||||||
|
s32 discardable_extents[BTRFS_STAT_NR_ENTRIES];
|
||||||
|
s64 discardable_bytes[BTRFS_STAT_NR_ENTRIES];
|
||||||
const struct btrfs_free_space_op *op;
|
const struct btrfs_free_space_op *op;
|
||||||
void *private;
|
void *private;
|
||||||
struct mutex cache_writeout_mutex;
|
struct mutex cache_writeout_mutex;
|
||||||
@ -83,13 +112,17 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
|
|||||||
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
|
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
|
||||||
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
|
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_free_space_ctl *ctl,
|
struct btrfs_free_space_ctl *ctl,
|
||||||
u64 bytenr, u64 size);
|
u64 bytenr, u64 size,
|
||||||
|
enum btrfs_trim_state trim_state);
|
||||||
int btrfs_add_free_space(struct btrfs_block_group *block_group,
|
int btrfs_add_free_space(struct btrfs_block_group *block_group,
|
||||||
u64 bytenr, u64 size);
|
u64 bytenr, u64 size);
|
||||||
|
int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
|
||||||
|
u64 bytenr, u64 size);
|
||||||
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
|
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
|
||||||
u64 bytenr, u64 size);
|
u64 bytenr, u64 size);
|
||||||
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
|
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
|
||||||
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
|
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
|
||||||
|
bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
|
||||||
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
||||||
u64 offset, u64 bytes, u64 empty_size,
|
u64 offset, u64 bytes, u64 empty_size,
|
||||||
u64 *max_extent_size);
|
u64 *max_extent_size);
|
||||||
@ -108,6 +141,12 @@ int btrfs_return_cluster_to_free_space(
|
|||||||
struct btrfs_free_cluster *cluster);
|
struct btrfs_free_cluster *cluster);
|
||||||
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
|
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
|
||||||
u64 *trimmed, u64 start, u64 end, u64 minlen);
|
u64 *trimmed, u64 start, u64 end, u64 minlen);
|
||||||
|
int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
|
||||||
|
u64 *trimmed, u64 start, u64 end, u64 minlen,
|
||||||
|
bool async);
|
||||||
|
int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
|
||||||
|
u64 *trimmed, u64 start, u64 end, u64 minlen,
|
||||||
|
u64 maxlen, bool async);
|
||||||
|
|
||||||
/* Support functions for running our sanity tests */
|
/* Support functions for running our sanity tests */
|
||||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||||
|
@ -107,7 +107,7 @@ again:
|
|||||||
|
|
||||||
if (last != (u64)-1 && last + 1 != key.objectid) {
|
if (last != (u64)-1 && last + 1 != key.objectid) {
|
||||||
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
||||||
key.objectid - last - 1);
|
key.objectid - last - 1, 0);
|
||||||
wake_up(&root->ino_cache_wait);
|
wake_up(&root->ino_cache_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,7 +118,7 @@ next:
|
|||||||
|
|
||||||
if (last < root->highest_objectid - 1) {
|
if (last < root->highest_objectid - 1) {
|
||||||
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
||||||
root->highest_objectid - last - 1);
|
root->highest_objectid - last - 1, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock(&root->ino_cache_lock);
|
spin_lock(&root->ino_cache_lock);
|
||||||
@ -175,7 +175,8 @@ static void start_caching(struct btrfs_root *root)
|
|||||||
ret = btrfs_find_free_objectid(root, &objectid);
|
ret = btrfs_find_free_objectid(root, &objectid);
|
||||||
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
|
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
|
||||||
__btrfs_add_free_space(fs_info, ctl, objectid,
|
__btrfs_add_free_space(fs_info, ctl, objectid,
|
||||||
BTRFS_LAST_FREE_OBJECTID - objectid + 1);
|
BTRFS_LAST_FREE_OBJECTID - objectid + 1,
|
||||||
|
0);
|
||||||
wake_up(&root->ino_cache_wait);
|
wake_up(&root->ino_cache_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -221,7 +222,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
|
|||||||
return;
|
return;
|
||||||
again:
|
again:
|
||||||
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
|
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
|
||||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1);
|
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
|
||||||
} else {
|
} else {
|
||||||
down_write(&fs_info->commit_root_sem);
|
down_write(&fs_info->commit_root_sem);
|
||||||
spin_lock(&root->ino_cache_lock);
|
spin_lock(&root->ino_cache_lock);
|
||||||
@ -234,7 +235,7 @@ again:
|
|||||||
|
|
||||||
start_caching(root);
|
start_caching(root);
|
||||||
|
|
||||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1);
|
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
|
||||||
|
|
||||||
up_write(&fs_info->commit_root_sem);
|
up_write(&fs_info->commit_root_sem);
|
||||||
}
|
}
|
||||||
@ -281,7 +282,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
|
|||||||
spin_unlock(rbroot_lock);
|
spin_unlock(rbroot_lock);
|
||||||
if (count)
|
if (count)
|
||||||
__btrfs_add_free_space(root->fs_info, ctl,
|
__btrfs_add_free_space(root->fs_info, ctl,
|
||||||
info->offset, count);
|
info->offset, count, 0);
|
||||||
kmem_cache_free(btrfs_free_space_cachep, info);
|
kmem_cache_free(btrfs_free_space_cachep, info);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
834
fs/btrfs/inode.c
834
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
@ -1128,7 +1128,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
|
|||||||
|
|
||||||
/* get the big lock and read metadata off disk */
|
/* get the big lock and read metadata off disk */
|
||||||
lock_extent_bits(io_tree, start, end, &cached);
|
lock_extent_bits(io_tree, start, end, &cached);
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
|
||||||
unlock_extent_cached(io_tree, start, end, &cached);
|
unlock_extent_cached(io_tree, start, end, &cached);
|
||||||
|
|
||||||
if (IS_ERR(em))
|
if (IS_ERR(em))
|
||||||
|
@ -20,9 +20,9 @@ static struct kmem_cache *btrfs_ordered_extent_cache;
|
|||||||
|
|
||||||
static u64 entry_end(struct btrfs_ordered_extent *entry)
|
static u64 entry_end(struct btrfs_ordered_extent *entry)
|
||||||
{
|
{
|
||||||
if (entry->file_offset + entry->len < entry->file_offset)
|
if (entry->file_offset + entry->num_bytes < entry->file_offset)
|
||||||
return (u64)-1;
|
return (u64)-1;
|
||||||
return entry->file_offset + entry->len;
|
return entry->file_offset + entry->num_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* returns NULL if the insertion worked, or it returns the node it did find
|
/* returns NULL if the insertion worked, or it returns the node it did find
|
||||||
@ -52,14 +52,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ordered_data_tree_panic(struct inode *inode, int errno,
|
|
||||||
u64 offset)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
|
||||||
btrfs_panic(fs_info, errno,
|
|
||||||
"Inconsistency in ordered tree at offset %llu", offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* look for a given offset in the tree, and if it can't be found return the
|
* look for a given offset in the tree, and if it can't be found return the
|
||||||
* first lesser offset
|
* first lesser offset
|
||||||
@ -120,7 +112,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
|
|||||||
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
||||||
{
|
{
|
||||||
if (file_offset < entry->file_offset ||
|
if (file_offset < entry->file_offset ||
|
||||||
entry->file_offset + entry->len <= file_offset)
|
entry->file_offset + entry->num_bytes <= file_offset)
|
||||||
return 0;
|
return 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -129,7 +121,7 @@ static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
|
|||||||
u64 len)
|
u64 len)
|
||||||
{
|
{
|
||||||
if (file_offset + len <= entry->file_offset ||
|
if (file_offset + len <= entry->file_offset ||
|
||||||
entry->file_offset + entry->len <= file_offset)
|
entry->file_offset + entry->num_bytes <= file_offset)
|
||||||
return 0;
|
return 0;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -161,19 +153,14 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* allocate and add a new ordered_extent into the per-inode tree.
|
/* allocate and add a new ordered_extent into the per-inode tree.
|
||||||
* file_offset is the logical offset in the file
|
|
||||||
*
|
|
||||||
* start is the disk block number of an extent already reserved in the
|
|
||||||
* extent allocation tree
|
|
||||||
*
|
|
||||||
* len is the length of the extent
|
|
||||||
*
|
*
|
||||||
* The tree is given a single reference on the ordered extent that was
|
* The tree is given a single reference on the ordered extent that was
|
||||||
* inserted.
|
* inserted.
|
||||||
*/
|
*/
|
||||||
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len,
|
u64 disk_bytenr, u64 num_bytes,
|
||||||
int type, int dio, int compress_type)
|
u64 disk_num_bytes, int type, int dio,
|
||||||
|
int compress_type)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
@ -187,10 +174,10 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
entry->file_offset = file_offset;
|
entry->file_offset = file_offset;
|
||||||
entry->start = start;
|
entry->disk_bytenr = disk_bytenr;
|
||||||
entry->len = len;
|
entry->num_bytes = num_bytes;
|
||||||
entry->disk_len = disk_len;
|
entry->disk_num_bytes = disk_num_bytes;
|
||||||
entry->bytes_left = len;
|
entry->bytes_left = num_bytes;
|
||||||
entry->inode = igrab(inode);
|
entry->inode = igrab(inode);
|
||||||
entry->compress_type = compress_type;
|
entry->compress_type = compress_type;
|
||||||
entry->truncated_len = (u64)-1;
|
entry->truncated_len = (u64)-1;
|
||||||
@ -198,7 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||||||
set_bit(type, &entry->flags);
|
set_bit(type, &entry->flags);
|
||||||
|
|
||||||
if (dio) {
|
if (dio) {
|
||||||
percpu_counter_add_batch(&fs_info->dio_bytes, len,
|
percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes,
|
||||||
fs_info->delalloc_batch);
|
fs_info->delalloc_batch);
|
||||||
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
|
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
|
||||||
}
|
}
|
||||||
@ -219,7 +206,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||||||
node = tree_insert(&tree->tree, file_offset,
|
node = tree_insert(&tree->tree, file_offset,
|
||||||
&entry->rb_node);
|
&entry->rb_node);
|
||||||
if (node)
|
if (node)
|
||||||
ordered_data_tree_panic(inode, -EEXIST, file_offset);
|
btrfs_panic(fs_info, -EEXIST,
|
||||||
|
"inconsistency in ordered tree at offset %llu",
|
||||||
|
file_offset);
|
||||||
spin_unlock_irq(&tree->lock);
|
spin_unlock_irq(&tree->lock);
|
||||||
|
|
||||||
spin_lock(&root->ordered_extent_lock);
|
spin_lock(&root->ordered_extent_lock);
|
||||||
@ -247,27 +236,30 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
|||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len, int type)
|
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
|
||||||
|
int type)
|
||||||
{
|
{
|
||||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||||
disk_len, type, 0,
|
num_bytes, disk_num_bytes, type, 0,
|
||||||
BTRFS_COMPRESS_NONE);
|
BTRFS_COMPRESS_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len, int type)
|
u64 disk_bytenr, u64 num_bytes,
|
||||||
|
u64 disk_num_bytes, int type)
|
||||||
{
|
{
|
||||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||||
disk_len, type, 1,
|
num_bytes, disk_num_bytes, type, 1,
|
||||||
BTRFS_COMPRESS_NONE);
|
BTRFS_COMPRESS_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len,
|
u64 disk_bytenr, u64 num_bytes,
|
||||||
int type, int compress_type)
|
u64 disk_num_bytes, int type,
|
||||||
|
int compress_type)
|
||||||
{
|
{
|
||||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||||
disk_len, type, 0,
|
num_bytes, disk_num_bytes, type, 0,
|
||||||
compress_type);
|
compress_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,8 +320,8 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
|
|||||||
}
|
}
|
||||||
|
|
||||||
dec_start = max(*file_offset, entry->file_offset);
|
dec_start = max(*file_offset, entry->file_offset);
|
||||||
dec_end = min(*file_offset + io_size, entry->file_offset +
|
dec_end = min(*file_offset + io_size,
|
||||||
entry->len);
|
entry->file_offset + entry->num_bytes);
|
||||||
*file_offset = dec_end;
|
*file_offset = dec_end;
|
||||||
if (dec_start > dec_end) {
|
if (dec_start > dec_end) {
|
||||||
btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu",
|
btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu",
|
||||||
@ -471,10 +463,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
|
|||||||
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
||||||
spin_unlock(&btrfs_inode->lock);
|
spin_unlock(&btrfs_inode->lock);
|
||||||
if (root != fs_info->tree_root)
|
if (root != fs_info->tree_root)
|
||||||
btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
|
btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
|
||||||
|
false);
|
||||||
|
|
||||||
if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
|
if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
|
||||||
percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len,
|
percpu_counter_add_batch(&fs_info->dio_bytes, -entry->num_bytes,
|
||||||
fs_info->delalloc_batch);
|
fs_info->delalloc_batch);
|
||||||
|
|
||||||
tree = &btrfs_inode->ordered_tree;
|
tree = &btrfs_inode->ordered_tree;
|
||||||
@ -534,8 +527,8 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
|||||||
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
|
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
|
||||||
root_extent_list);
|
root_extent_list);
|
||||||
|
|
||||||
if (range_end <= ordered->start ||
|
if (range_end <= ordered->disk_bytenr ||
|
||||||
ordered->start + ordered->disk_len <= range_start) {
|
ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) {
|
||||||
list_move_tail(&ordered->root_extent_list, &skipped);
|
list_move_tail(&ordered->root_extent_list, &skipped);
|
||||||
cond_resched_lock(&root->ordered_extent_lock);
|
cond_resched_lock(&root->ordered_extent_lock);
|
||||||
continue;
|
continue;
|
||||||
@ -619,7 +612,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
|
|||||||
int wait)
|
int wait)
|
||||||
{
|
{
|
||||||
u64 start = entry->file_offset;
|
u64 start = entry->file_offset;
|
||||||
u64 end = start + entry->len - 1;
|
u64 end = start + entry->num_bytes - 1;
|
||||||
|
|
||||||
trace_btrfs_ordered_extent_start(inode, entry);
|
trace_btrfs_ordered_extent_start(inode, entry);
|
||||||
|
|
||||||
@ -680,7 +673,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
|||||||
btrfs_put_ordered_extent(ordered);
|
btrfs_put_ordered_extent(ordered);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ordered->file_offset + ordered->len <= start) {
|
if (ordered->file_offset + ordered->num_bytes <= start) {
|
||||||
btrfs_put_ordered_extent(ordered);
|
btrfs_put_ordered_extent(ordered);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -67,14 +67,13 @@ struct btrfs_ordered_extent {
|
|||||||
/* logical offset in the file */
|
/* logical offset in the file */
|
||||||
u64 file_offset;
|
u64 file_offset;
|
||||||
|
|
||||||
/* disk byte number */
|
/*
|
||||||
u64 start;
|
* These fields directly correspond to the same fields in
|
||||||
|
* btrfs_file_extent_item.
|
||||||
/* ram length of the extent in bytes */
|
*/
|
||||||
u64 len;
|
u64 disk_bytenr;
|
||||||
|
u64 num_bytes;
|
||||||
/* extent length on disk */
|
u64 disk_num_bytes;
|
||||||
u64 disk_len;
|
|
||||||
|
|
||||||
/* number of bytes that still need writing */
|
/* number of bytes that still need writing */
|
||||||
u64 bytes_left;
|
u64 bytes_left;
|
||||||
@ -161,12 +160,15 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
|
|||||||
u64 *file_offset, u64 io_size,
|
u64 *file_offset, u64 io_size,
|
||||||
int uptodate);
|
int uptodate);
|
||||||
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len, int type);
|
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
|
||||||
|
int type);
|
||||||
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len, int type);
|
u64 disk_bytenr, u64 num_bytes,
|
||||||
|
u64 disk_num_bytes, int type);
|
||||||
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
||||||
u64 start, u64 len, u64 disk_len,
|
u64 disk_bytenr, u64 num_bytes,
|
||||||
int type, int compress_type);
|
u64 disk_num_bytes, int type,
|
||||||
|
int compress_type);
|
||||||
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
||||||
struct btrfs_ordered_sum *sum);
|
struct btrfs_ordered_sum *sum);
|
||||||
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
||||||
|
@ -317,7 +317,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
|
|||||||
print_uuid_item(l, btrfs_item_ptr_offset(l, i),
|
print_uuid_item(l, btrfs_item_ptr_offset(l, i),
|
||||||
btrfs_item_size_nr(l, i));
|
btrfs_item_size_nr(l, i));
|
||||||
break;
|
break;
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1243,7 +1243,6 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
|||||||
u64 dst)
|
u64 dst)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *quota_root;
|
|
||||||
struct btrfs_qgroup *parent;
|
struct btrfs_qgroup *parent;
|
||||||
struct btrfs_qgroup *member;
|
struct btrfs_qgroup *member;
|
||||||
struct btrfs_qgroup_list *list;
|
struct btrfs_qgroup_list *list;
|
||||||
@ -1259,9 +1258,8 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
|||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root) {
|
||||||
if (!quota_root) {
|
ret = -ENOTCONN;
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
member = find_qgroup_rb(fs_info, src);
|
member = find_qgroup_rb(fs_info, src);
|
||||||
@ -1307,7 +1305,6 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
|||||||
u64 dst)
|
u64 dst)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *quota_root;
|
|
||||||
struct btrfs_qgroup *parent;
|
struct btrfs_qgroup *parent;
|
||||||
struct btrfs_qgroup *member;
|
struct btrfs_qgroup *member;
|
||||||
struct btrfs_qgroup_list *list;
|
struct btrfs_qgroup_list *list;
|
||||||
@ -1320,9 +1317,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
|||||||
if (!tmp)
|
if (!tmp)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root) {
|
||||||
if (!quota_root) {
|
ret = -ENOTCONN;
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1387,11 +1383,11 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
|||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root) {
|
||||||
if (!quota_root) {
|
ret = -ENOTCONN;
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
quota_root = fs_info->quota_root;
|
||||||
qgroup = find_qgroup_rb(fs_info, qgroupid);
|
qgroup = find_qgroup_rb(fs_info, qgroupid);
|
||||||
if (qgroup) {
|
if (qgroup) {
|
||||||
ret = -EEXIST;
|
ret = -EEXIST;
|
||||||
@ -1416,15 +1412,13 @@ out:
|
|||||||
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *quota_root;
|
|
||||||
struct btrfs_qgroup *qgroup;
|
struct btrfs_qgroup *qgroup;
|
||||||
struct btrfs_qgroup_list *list;
|
struct btrfs_qgroup_list *list;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root) {
|
||||||
if (!quota_root) {
|
ret = -ENOTCONN;
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1465,7 +1459,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
|||||||
struct btrfs_qgroup_limit *limit)
|
struct btrfs_qgroup_limit *limit)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *quota_root;
|
|
||||||
struct btrfs_qgroup *qgroup;
|
struct btrfs_qgroup *qgroup;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
/* Sometimes we would want to clear the limit on this qgroup.
|
/* Sometimes we would want to clear the limit on this qgroup.
|
||||||
@ -1475,9 +1468,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
|||||||
const u64 CLEAR_VALUE = -1;
|
const u64 CLEAR_VALUE = -1;
|
||||||
|
|
||||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root) {
|
||||||
if (!quota_root) {
|
ret = -ENOTCONN;
|
||||||
ret = -EINVAL;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2582,10 +2574,9 @@ cleanup:
|
|||||||
int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
|
int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *quota_root = fs_info->quota_root;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!quota_root)
|
if (!fs_info->quota_root)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
spin_lock(&fs_info->qgroup_lock);
|
spin_lock(&fs_info->qgroup_lock);
|
||||||
@ -2879,7 +2870,6 @@ static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
|
|||||||
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
|
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
|
||||||
enum btrfs_qgroup_rsv_type type)
|
enum btrfs_qgroup_rsv_type type)
|
||||||
{
|
{
|
||||||
struct btrfs_root *quota_root;
|
|
||||||
struct btrfs_qgroup *qgroup;
|
struct btrfs_qgroup *qgroup;
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
u64 ref_root = root->root_key.objectid;
|
u64 ref_root = root->root_key.objectid;
|
||||||
@ -2898,8 +2888,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
|
|||||||
enforce = false;
|
enforce = false;
|
||||||
|
|
||||||
spin_lock(&fs_info->qgroup_lock);
|
spin_lock(&fs_info->qgroup_lock);
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root)
|
||||||
if (!quota_root)
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
qgroup = find_qgroup_rb(fs_info, ref_root);
|
qgroup = find_qgroup_rb(fs_info, ref_root);
|
||||||
@ -2966,7 +2955,6 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
|||||||
u64 ref_root, u64 num_bytes,
|
u64 ref_root, u64 num_bytes,
|
||||||
enum btrfs_qgroup_rsv_type type)
|
enum btrfs_qgroup_rsv_type type)
|
||||||
{
|
{
|
||||||
struct btrfs_root *quota_root;
|
|
||||||
struct btrfs_qgroup *qgroup;
|
struct btrfs_qgroup *qgroup;
|
||||||
struct ulist_node *unode;
|
struct ulist_node *unode;
|
||||||
struct ulist_iterator uiter;
|
struct ulist_iterator uiter;
|
||||||
@ -2984,8 +2972,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
|||||||
}
|
}
|
||||||
spin_lock(&fs_info->qgroup_lock);
|
spin_lock(&fs_info->qgroup_lock);
|
||||||
|
|
||||||
quota_root = fs_info->quota_root;
|
if (!fs_info->quota_root)
|
||||||
if (!quota_root)
|
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
qgroup = find_qgroup_rb(fs_info, ref_root);
|
qgroup = find_qgroup_rb(fs_info, ref_root);
|
||||||
@ -3685,7 +3672,6 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
|
|||||||
static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
|
static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
|
||||||
int num_bytes)
|
int num_bytes)
|
||||||
{
|
{
|
||||||
struct btrfs_root *quota_root = fs_info->quota_root;
|
|
||||||
struct btrfs_qgroup *qgroup;
|
struct btrfs_qgroup *qgroup;
|
||||||
struct ulist_node *unode;
|
struct ulist_node *unode;
|
||||||
struct ulist_iterator uiter;
|
struct ulist_iterator uiter;
|
||||||
@ -3693,7 +3679,7 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
|
|||||||
|
|
||||||
if (num_bytes == 0)
|
if (num_bytes == 0)
|
||||||
return;
|
return;
|
||||||
if (!quota_root)
|
if (!fs_info->quota_root)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
spin_lock(&fs_info->qgroup_lock);
|
spin_lock(&fs_info->qgroup_lock);
|
||||||
|
@ -4332,6 +4332,15 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
|
|||||||
block_group->start, buf);
|
block_group->start, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char *stage_to_string(int stage)
|
||||||
|
{
|
||||||
|
if (stage == MOVE_DATA_EXTENTS)
|
||||||
|
return "move data extents";
|
||||||
|
if (stage == UPDATE_DATA_PTRS)
|
||||||
|
return "update data pointers";
|
||||||
|
return "unknown";
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* function to relocate all extents in a block group.
|
* function to relocate all extents in a block group.
|
||||||
*/
|
*/
|
||||||
@ -4406,12 +4415,15 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
|||||||
rc->block_group->length);
|
rc->block_group->length);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
int finishes_stage;
|
||||||
|
|
||||||
mutex_lock(&fs_info->cleaner_mutex);
|
mutex_lock(&fs_info->cleaner_mutex);
|
||||||
ret = relocate_block_group(rc);
|
ret = relocate_block_group(rc);
|
||||||
mutex_unlock(&fs_info->cleaner_mutex);
|
mutex_unlock(&fs_info->cleaner_mutex);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
err = ret;
|
err = ret;
|
||||||
|
|
||||||
|
finishes_stage = rc->stage;
|
||||||
/*
|
/*
|
||||||
* We may have gotten ENOSPC after we already dirtied some
|
* We may have gotten ENOSPC after we already dirtied some
|
||||||
* extents. If writeout happens while we're relocating a
|
* extents. If writeout happens while we're relocating a
|
||||||
@ -4437,8 +4449,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
|||||||
if (rc->extents_found == 0)
|
if (rc->extents_found == 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
btrfs_info(fs_info, "found %llu extents", rc->extents_found);
|
btrfs_info(fs_info, "found %llu extents, stage: %s",
|
||||||
|
rc->extents_found, stage_to_string(finishes_stage));
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(rc->block_group->pinned > 0);
|
WARN_ON(rc->block_group->pinned > 0);
|
||||||
@ -4656,7 +4668,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
|
|||||||
LIST_HEAD(list);
|
LIST_HEAD(list);
|
||||||
|
|
||||||
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
|
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
|
||||||
BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
|
BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
|
||||||
|
|
||||||
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
|
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
|
||||||
ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
|
ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
|
||||||
@ -4680,7 +4692,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
|
|||||||
* disk_len vs real len like with real inodes since it's all
|
* disk_len vs real len like with real inodes since it's all
|
||||||
* disk length.
|
* disk length.
|
||||||
*/
|
*/
|
||||||
new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
|
new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
|
||||||
sums->bytenr = new_bytenr;
|
sums->bytenr = new_bytenr;
|
||||||
|
|
||||||
btrfs_add_ordered_sum(ordered, sums);
|
btrfs_add_ordered_sum(ordered, sums);
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
#include <crypto/hash.h>
|
#include <crypto/hash.h>
|
||||||
#include "ctree.h"
|
#include "ctree.h"
|
||||||
|
#include "discard.h"
|
||||||
#include "volumes.h"
|
#include "volumes.h"
|
||||||
#include "disk-io.h"
|
#include "disk-io.h"
|
||||||
#include "ordered-data.h"
|
#include "ordered-data.h"
|
||||||
@ -3682,7 +3683,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
|||||||
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
|
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
|
||||||
cache->used == 0) {
|
cache->used == 0) {
|
||||||
spin_unlock(&cache->lock);
|
spin_unlock(&cache->lock);
|
||||||
btrfs_mark_bg_unused(cache);
|
if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||||
|
btrfs_discard_queue_work(&fs_info->discard_ctl,
|
||||||
|
cache);
|
||||||
|
else
|
||||||
|
btrfs_mark_bg_unused(cache);
|
||||||
} else {
|
} else {
|
||||||
spin_unlock(&cache->lock);
|
spin_unlock(&cache->lock);
|
||||||
}
|
}
|
||||||
|
@ -161,8 +161,7 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
|
|||||||
|
|
||||||
static int can_overcommit(struct btrfs_fs_info *fs_info,
|
static int can_overcommit(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info, u64 bytes,
|
struct btrfs_space_info *space_info, u64 bytes,
|
||||||
enum btrfs_reserve_flush_enum flush,
|
enum btrfs_reserve_flush_enum flush)
|
||||||
bool system_chunk)
|
|
||||||
{
|
{
|
||||||
u64 profile;
|
u64 profile;
|
||||||
u64 avail;
|
u64 avail;
|
||||||
@ -173,7 +172,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
|
|||||||
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
|
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (system_chunk)
|
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||||
profile = btrfs_system_alloc_profile(fs_info);
|
profile = btrfs_system_alloc_profile(fs_info);
|
||||||
else
|
else
|
||||||
profile = btrfs_metadata_alloc_profile(fs_info);
|
profile = btrfs_metadata_alloc_profile(fs_info);
|
||||||
@ -227,8 +226,7 @@ again:
|
|||||||
|
|
||||||
/* Check and see if our ticket can be satisified now. */
|
/* Check and see if our ticket can be satisified now. */
|
||||||
if ((used + ticket->bytes <= space_info->total_bytes) ||
|
if ((used + ticket->bytes <= space_info->total_bytes) ||
|
||||||
can_overcommit(fs_info, space_info, ticket->bytes, flush,
|
can_overcommit(fs_info, space_info, ticket->bytes, flush)) {
|
||||||
false)) {
|
|
||||||
btrfs_space_info_update_bytes_may_use(fs_info,
|
btrfs_space_info_update_bytes_may_use(fs_info,
|
||||||
space_info,
|
space_info,
|
||||||
ticket->bytes);
|
ticket->bytes);
|
||||||
@ -626,8 +624,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
static inline u64
|
static inline u64
|
||||||
btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info,
|
struct btrfs_space_info *space_info)
|
||||||
bool system_chunk)
|
|
||||||
{
|
{
|
||||||
struct reserve_ticket *ticket;
|
struct reserve_ticket *ticket;
|
||||||
u64 used;
|
u64 used;
|
||||||
@ -643,13 +640,12 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
|
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
|
||||||
if (can_overcommit(fs_info, space_info, to_reclaim,
|
if (can_overcommit(fs_info, space_info, to_reclaim,
|
||||||
BTRFS_RESERVE_FLUSH_ALL, system_chunk))
|
BTRFS_RESERVE_FLUSH_ALL))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
used = btrfs_space_info_used(space_info, true);
|
used = btrfs_space_info_used(space_info, true);
|
||||||
|
|
||||||
if (can_overcommit(fs_info, space_info, SZ_1M,
|
if (can_overcommit(fs_info, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
|
||||||
BTRFS_RESERVE_FLUSH_ALL, system_chunk))
|
|
||||||
expected = div_factor_fine(space_info->total_bytes, 95);
|
expected = div_factor_fine(space_info->total_bytes, 95);
|
||||||
else
|
else
|
||||||
expected = div_factor_fine(space_info->total_bytes, 90);
|
expected = div_factor_fine(space_info->total_bytes, 90);
|
||||||
@ -665,7 +661,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
|||||||
|
|
||||||
static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
|
static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info,
|
struct btrfs_space_info *space_info,
|
||||||
u64 used, bool system_chunk)
|
u64 used)
|
||||||
{
|
{
|
||||||
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
|
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
|
||||||
|
|
||||||
@ -673,8 +669,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
|
|||||||
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
|
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info))
|
||||||
system_chunk))
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
|
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
|
||||||
@ -765,8 +760,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
|||||||
space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
||||||
|
|
||||||
spin_lock(&space_info->lock);
|
spin_lock(&space_info->lock);
|
||||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
|
||||||
false);
|
|
||||||
if (!to_reclaim) {
|
if (!to_reclaim) {
|
||||||
space_info->flush = 0;
|
space_info->flush = 0;
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
@ -785,8 +779,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
|
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
|
||||||
space_info,
|
space_info);
|
||||||
false);
|
|
||||||
if (last_tickets_id == space_info->tickets_id) {
|
if (last_tickets_id == space_info->tickets_id) {
|
||||||
flush_state++;
|
flush_state++;
|
||||||
} else {
|
} else {
|
||||||
@ -858,8 +851,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
|||||||
int flush_state;
|
int flush_state;
|
||||||
|
|
||||||
spin_lock(&space_info->lock);
|
spin_lock(&space_info->lock);
|
||||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
|
||||||
false);
|
|
||||||
if (!to_reclaim) {
|
if (!to_reclaim) {
|
||||||
spin_unlock(&space_info->lock);
|
spin_unlock(&space_info->lock);
|
||||||
return;
|
return;
|
||||||
@ -990,8 +982,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
|
|||||||
static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info,
|
struct btrfs_space_info *space_info,
|
||||||
u64 orig_bytes,
|
u64 orig_bytes,
|
||||||
enum btrfs_reserve_flush_enum flush,
|
enum btrfs_reserve_flush_enum flush)
|
||||||
bool system_chunk)
|
|
||||||
{
|
{
|
||||||
struct reserve_ticket ticket;
|
struct reserve_ticket ticket;
|
||||||
u64 used;
|
u64 used;
|
||||||
@ -1013,8 +1004,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
|||||||
*/
|
*/
|
||||||
if (!pending_tickets &&
|
if (!pending_tickets &&
|
||||||
((used + orig_bytes <= space_info->total_bytes) ||
|
((used + orig_bytes <= space_info->total_bytes) ||
|
||||||
can_overcommit(fs_info, space_info, orig_bytes, flush,
|
can_overcommit(fs_info, space_info, orig_bytes, flush))) {
|
||||||
system_chunk))) {
|
|
||||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
||||||
orig_bytes);
|
orig_bytes);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
@ -1054,8 +1044,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
|||||||
* the async reclaim as we will panic.
|
* the async reclaim as we will panic.
|
||||||
*/
|
*/
|
||||||
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
|
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
|
||||||
need_do_async_reclaim(fs_info, space_info,
|
need_do_async_reclaim(fs_info, space_info, used) &&
|
||||||
used, system_chunk) &&
|
|
||||||
!work_busy(&fs_info->async_reclaim_work)) {
|
!work_busy(&fs_info->async_reclaim_work)) {
|
||||||
trace_btrfs_trigger_flush(fs_info, space_info->flags,
|
trace_btrfs_trigger_flush(fs_info, space_info->flags,
|
||||||
orig_bytes, flush, "preempt");
|
orig_bytes, flush, "preempt");
|
||||||
@ -1092,10 +1081,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
|
|||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||||
int ret;
|
int ret;
|
||||||
bool system_chunk = (root == fs_info->chunk_root);
|
|
||||||
|
|
||||||
ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
|
ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
|
||||||
orig_bytes, flush, system_chunk);
|
orig_bytes, flush);
|
||||||
if (ret == -ENOSPC &&
|
if (ret == -ENOSPC &&
|
||||||
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
|
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
|
||||||
if (block_rsv != global_rsv &&
|
if (block_rsv != global_rsv &&
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
#include "tests/btrfs-tests.h"
|
#include "tests/btrfs-tests.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
|
#include "discard.h"
|
||||||
|
|
||||||
#include "qgroup.h"
|
#include "qgroup.h"
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
@ -146,6 +147,8 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
|||||||
if (sb_rdonly(sb))
|
if (sb_rdonly(sb))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
btrfs_discard_stop(fs_info);
|
||||||
|
|
||||||
/* btrfs handle error by forcing the filesystem readonly */
|
/* btrfs handle error by forcing the filesystem readonly */
|
||||||
sb->s_flags |= SB_RDONLY;
|
sb->s_flags |= SB_RDONLY;
|
||||||
btrfs_info(fs_info, "forced readonly");
|
btrfs_info(fs_info, "forced readonly");
|
||||||
@ -313,6 +316,7 @@ enum {
|
|||||||
Opt_datasum, Opt_nodatasum,
|
Opt_datasum, Opt_nodatasum,
|
||||||
Opt_defrag, Opt_nodefrag,
|
Opt_defrag, Opt_nodefrag,
|
||||||
Opt_discard, Opt_nodiscard,
|
Opt_discard, Opt_nodiscard,
|
||||||
|
Opt_discard_mode,
|
||||||
Opt_nologreplay,
|
Opt_nologreplay,
|
||||||
Opt_norecovery,
|
Opt_norecovery,
|
||||||
Opt_ratio,
|
Opt_ratio,
|
||||||
@ -375,6 +379,7 @@ static const match_table_t tokens = {
|
|||||||
{Opt_defrag, "autodefrag"},
|
{Opt_defrag, "autodefrag"},
|
||||||
{Opt_nodefrag, "noautodefrag"},
|
{Opt_nodefrag, "noautodefrag"},
|
||||||
{Opt_discard, "discard"},
|
{Opt_discard, "discard"},
|
||||||
|
{Opt_discard_mode, "discard=%s"},
|
||||||
{Opt_nodiscard, "nodiscard"},
|
{Opt_nodiscard, "nodiscard"},
|
||||||
{Opt_nologreplay, "nologreplay"},
|
{Opt_nologreplay, "nologreplay"},
|
||||||
{Opt_norecovery, "norecovery"},
|
{Opt_norecovery, "norecovery"},
|
||||||
@ -695,12 +700,26 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
|||||||
info->metadata_ratio);
|
info->metadata_ratio);
|
||||||
break;
|
break;
|
||||||
case Opt_discard:
|
case Opt_discard:
|
||||||
btrfs_set_and_info(info, DISCARD,
|
case Opt_discard_mode:
|
||||||
"turning on discard");
|
if (token == Opt_discard ||
|
||||||
|
strcmp(args[0].from, "sync") == 0) {
|
||||||
|
btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
|
||||||
|
btrfs_set_and_info(info, DISCARD_SYNC,
|
||||||
|
"turning on sync discard");
|
||||||
|
} else if (strcmp(args[0].from, "async") == 0) {
|
||||||
|
btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
|
||||||
|
btrfs_set_and_info(info, DISCARD_ASYNC,
|
||||||
|
"turning on async discard");
|
||||||
|
} else {
|
||||||
|
ret = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case Opt_nodiscard:
|
case Opt_nodiscard:
|
||||||
btrfs_clear_and_info(info, DISCARD,
|
btrfs_clear_and_info(info, DISCARD_SYNC,
|
||||||
"turning off discard");
|
"turning off discard");
|
||||||
|
btrfs_clear_and_info(info, DISCARD_ASYNC,
|
||||||
|
"turning off async discard");
|
||||||
break;
|
break;
|
||||||
case Opt_space_cache:
|
case Opt_space_cache:
|
||||||
case Opt_space_cache_version:
|
case Opt_space_cache_version:
|
||||||
@ -1322,8 +1341,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
|||||||
seq_puts(seq, ",nologreplay");
|
seq_puts(seq, ",nologreplay");
|
||||||
if (btrfs_test_opt(info, FLUSHONCOMMIT))
|
if (btrfs_test_opt(info, FLUSHONCOMMIT))
|
||||||
seq_puts(seq, ",flushoncommit");
|
seq_puts(seq, ",flushoncommit");
|
||||||
if (btrfs_test_opt(info, DISCARD))
|
if (btrfs_test_opt(info, DISCARD_SYNC))
|
||||||
seq_puts(seq, ",discard");
|
seq_puts(seq, ",discard");
|
||||||
|
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||||
|
seq_puts(seq, ",discard=async");
|
||||||
if (!(info->sb->s_flags & SB_POSIXACL))
|
if (!(info->sb->s_flags & SB_POSIXACL))
|
||||||
seq_puts(seq, ",noacl");
|
seq_puts(seq, ",noacl");
|
||||||
if (btrfs_test_opt(info, SPACE_CACHE))
|
if (btrfs_test_opt(info, SPACE_CACHE))
|
||||||
@ -1713,6 +1734,14 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
|
|||||||
btrfs_cleanup_defrag_inodes(fs_info);
|
btrfs_cleanup_defrag_inodes(fs_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If we toggled discard async */
|
||||||
|
if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
|
||||||
|
btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||||
|
btrfs_discard_resume(fs_info);
|
||||||
|
else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
|
||||||
|
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||||
|
btrfs_discard_cleanup(fs_info);
|
||||||
|
|
||||||
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1760,6 +1789,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
|||||||
*/
|
*/
|
||||||
cancel_work_sync(&fs_info->async_reclaim_work);
|
cancel_work_sync(&fs_info->async_reclaim_work);
|
||||||
|
|
||||||
|
btrfs_discard_cleanup(fs_info);
|
||||||
|
|
||||||
/* wait for the uuid_scan task to finish */
|
/* wait for the uuid_scan task to finish */
|
||||||
down(&fs_info->uuid_tree_rescan_sem);
|
down(&fs_info->uuid_tree_rescan_sem);
|
||||||
/* avoid complains from lockdep et al. */
|
/* avoid complains from lockdep et al. */
|
||||||
|
394
fs/btrfs/sysfs.c
394
fs/btrfs/sysfs.c
@ -12,6 +12,7 @@
|
|||||||
#include <crypto/hash.h>
|
#include <crypto/hash.h>
|
||||||
|
|
||||||
#include "ctree.h"
|
#include "ctree.h"
|
||||||
|
#include "discard.h"
|
||||||
#include "disk-io.h"
|
#include "disk-io.h"
|
||||||
#include "transaction.h"
|
#include "transaction.h"
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
@ -338,12 +339,178 @@ static const struct attribute_group btrfs_static_feature_attr_group = {
|
|||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Discard statistics and tunables
|
||||||
|
*/
|
||||||
|
#define discard_to_fs_info(_kobj) to_fs_info((_kobj)->parent->parent)
|
||||||
|
|
||||||
|
static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||||
|
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n",
|
||||||
|
atomic_read(&fs_info->discard_ctl.discardable_extents));
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||||
|
fs_info->discard_ctl.discard_bitmap_bytes);
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||||
|
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||||
|
fs_info->discard_ctl.discard_extent_bytes);
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||||
|
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||||
|
u32 iops_limit;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = kstrtou32(buf, 10, &iops_limit);
|
||||||
|
if (ret)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
WRITE_ONCE(discard_ctl->iops_limit, iops_limit);
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
BTRFS_ATTR_RW(discard, iops_limit, btrfs_discard_iops_limit_show,
|
||||||
|
btrfs_discard_iops_limit_store);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||||
|
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||||
|
u32 kbps_limit;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = kstrtou32(buf, 10, &kbps_limit);
|
||||||
|
if (ret)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
WRITE_ONCE(discard_ctl->kbps_limit, kbps_limit);
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
BTRFS_ATTR_RW(discard, kbps_limit, btrfs_discard_kbps_limit_show,
|
||||||
|
btrfs_discard_kbps_limit_store);
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||||
|
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||||
|
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||||
|
u64 max_discard_size;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = kstrtou64(buf, 10, &max_discard_size);
|
||||||
|
if (ret)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
WRITE_ONCE(discard_ctl->max_discard_size, max_discard_size);
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show,
|
||||||
|
btrfs_discard_max_discard_size_store);
|
||||||
|
|
||||||
|
static const struct attribute *discard_debug_attrs[] = {
|
||||||
|
BTRFS_ATTR_PTR(discard, discardable_bytes),
|
||||||
|
BTRFS_ATTR_PTR(discard, discardable_extents),
|
||||||
|
BTRFS_ATTR_PTR(discard, discard_bitmap_bytes),
|
||||||
|
BTRFS_ATTR_PTR(discard, discard_bytes_saved),
|
||||||
|
BTRFS_ATTR_PTR(discard, discard_extent_bytes),
|
||||||
|
BTRFS_ATTR_PTR(discard, iops_limit),
|
||||||
|
BTRFS_ATTR_PTR(discard, kbps_limit),
|
||||||
|
BTRFS_ATTR_PTR(discard, max_discard_size),
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Runtime debugging exported via sysfs
|
* Runtime debugging exported via sysfs
|
||||||
*
|
*
|
||||||
* /sys/fs/btrfs/debug - applies to module or all filesystems
|
* /sys/fs/btrfs/debug - applies to module or all filesystems
|
||||||
* /sys/fs/btrfs/UUID - applies only to the given filesystem
|
* /sys/fs/btrfs/UUID - applies only to the given filesystem
|
||||||
*/
|
*/
|
||||||
|
static const struct attribute *btrfs_debug_mount_attrs[] = {
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
static struct attribute *btrfs_debug_feature_attrs[] = {
|
static struct attribute *btrfs_debug_feature_attrs[] = {
|
||||||
NULL
|
NULL
|
||||||
};
|
};
|
||||||
@ -734,10 +901,10 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
|
|||||||
|
|
||||||
static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
|
static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
|
||||||
{
|
{
|
||||||
if (fs_devs->device_dir_kobj) {
|
if (fs_devs->devices_kobj) {
|
||||||
kobject_del(fs_devs->device_dir_kobj);
|
kobject_del(fs_devs->devices_kobj);
|
||||||
kobject_put(fs_devs->device_dir_kobj);
|
kobject_put(fs_devs->devices_kobj);
|
||||||
fs_devs->device_dir_kobj = NULL;
|
fs_devs->devices_kobj = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fs_devs->fsid_kobj.state_initialized) {
|
if (fs_devs->fsid_kobj.state_initialized) {
|
||||||
@ -771,6 +938,19 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
|
|||||||
kobject_del(fs_info->space_info_kobj);
|
kobject_del(fs_info->space_info_kobj);
|
||||||
kobject_put(fs_info->space_info_kobj);
|
kobject_put(fs_info->space_info_kobj);
|
||||||
}
|
}
|
||||||
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
if (fs_info->discard_debug_kobj) {
|
||||||
|
sysfs_remove_files(fs_info->discard_debug_kobj,
|
||||||
|
discard_debug_attrs);
|
||||||
|
kobject_del(fs_info->discard_debug_kobj);
|
||||||
|
kobject_put(fs_info->discard_debug_kobj);
|
||||||
|
}
|
||||||
|
if (fs_info->debug_kobj) {
|
||||||
|
sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
|
||||||
|
kobject_del(fs_info->debug_kobj);
|
||||||
|
kobject_put(fs_info->debug_kobj);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
addrm_unknown_feature_attrs(fs_info, false);
|
addrm_unknown_feature_attrs(fs_info, false);
|
||||||
sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
|
sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
|
||||||
sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
|
sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
|
||||||
@ -969,45 +1149,119 @@ int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
|||||||
struct hd_struct *disk;
|
struct hd_struct *disk;
|
||||||
struct kobject *disk_kobj;
|
struct kobject *disk_kobj;
|
||||||
|
|
||||||
if (!fs_devices->device_dir_kobj)
|
if (!fs_devices->devices_kobj)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (one_device && one_device->bdev) {
|
if (one_device) {
|
||||||
disk = one_device->bdev->bd_part;
|
if (one_device->bdev) {
|
||||||
disk_kobj = &part_to_dev(disk)->kobj;
|
disk = one_device->bdev->bd_part;
|
||||||
|
disk_kobj = &part_to_dev(disk)->kobj;
|
||||||
|
sysfs_remove_link(fs_devices->devices_kobj,
|
||||||
|
disk_kobj->name);
|
||||||
|
}
|
||||||
|
|
||||||
sysfs_remove_link(fs_devices->device_dir_kobj,
|
kobject_del(&one_device->devid_kobj);
|
||||||
disk_kobj->name);
|
kobject_put(&one_device->devid_kobj);
|
||||||
}
|
|
||||||
|
wait_for_completion(&one_device->kobj_unregister);
|
||||||
|
|
||||||
if (one_device)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
list_for_each_entry(one_device,
|
list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
|
||||||
&fs_devices->devices, dev_list) {
|
|
||||||
if (!one_device->bdev)
|
|
||||||
continue;
|
|
||||||
disk = one_device->bdev->bd_part;
|
|
||||||
disk_kobj = &part_to_dev(disk)->kobj;
|
|
||||||
|
|
||||||
sysfs_remove_link(fs_devices->device_dir_kobj,
|
if (one_device->bdev) {
|
||||||
disk_kobj->name);
|
disk = one_device->bdev->bd_part;
|
||||||
|
disk_kobj = &part_to_dev(disk)->kobj;
|
||||||
|
sysfs_remove_link(fs_devices->devices_kobj,
|
||||||
|
disk_kobj->name);
|
||||||
|
}
|
||||||
|
kobject_del(&one_device->devid_kobj);
|
||||||
|
kobject_put(&one_device->devid_kobj);
|
||||||
|
|
||||||
|
wait_for_completion(&one_device->kobj_unregister);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
|
static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
{
|
{
|
||||||
if (!fs_devs->device_dir_kobj)
|
int val;
|
||||||
fs_devs->device_dir_kobj = kobject_create_and_add("devices",
|
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||||
&fs_devs->fsid_kobj);
|
devid_kobj);
|
||||||
|
|
||||||
if (!fs_devs->device_dir_kobj)
|
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
return 0;
|
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||||
}
|
}
|
||||||
|
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_sysfs_missing_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a, char *buf)
|
||||||
|
{
|
||||||
|
int val;
|
||||||
|
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||||
|
devid_kobj);
|
||||||
|
|
||||||
|
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(devid, missing, btrfs_sysfs_missing_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
int val;
|
||||||
|
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||||
|
devid_kobj);
|
||||||
|
|
||||||
|
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
|
||||||
|
|
||||||
|
static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
|
||||||
|
struct kobj_attribute *a, char *buf)
|
||||||
|
{
|
||||||
|
int val;
|
||||||
|
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||||
|
devid_kobj);
|
||||||
|
|
||||||
|
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||||
|
}
|
||||||
|
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
|
||||||
|
|
||||||
|
static struct attribute *devid_attrs[] = {
|
||||||
|
BTRFS_ATTR_PTR(devid, in_fs_metadata),
|
||||||
|
BTRFS_ATTR_PTR(devid, missing),
|
||||||
|
BTRFS_ATTR_PTR(devid, replace_target),
|
||||||
|
BTRFS_ATTR_PTR(devid, writeable),
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
ATTRIBUTE_GROUPS(devid);
|
||||||
|
|
||||||
|
static void btrfs_release_devid_kobj(struct kobject *kobj)
|
||||||
|
{
|
||||||
|
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||||
|
devid_kobj);
|
||||||
|
|
||||||
|
memset(&device->devid_kobj, 0, sizeof(struct kobject));
|
||||||
|
complete(&device->kobj_unregister);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct kobj_type devid_ktype = {
|
||||||
|
.sysfs_ops = &kobj_sysfs_ops,
|
||||||
|
.default_groups = devid_groups,
|
||||||
|
.release = btrfs_release_devid_kobj,
|
||||||
|
};
|
||||||
|
|
||||||
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||||
struct btrfs_device *one_device)
|
struct btrfs_device *one_device)
|
||||||
@ -1016,22 +1270,31 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
|||||||
struct btrfs_device *dev;
|
struct btrfs_device *dev;
|
||||||
|
|
||||||
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
|
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
|
||||||
struct hd_struct *disk;
|
|
||||||
struct kobject *disk_kobj;
|
|
||||||
|
|
||||||
if (!dev->bdev)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (one_device && one_device != dev)
|
if (one_device && one_device != dev)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
disk = dev->bdev->bd_part;
|
if (dev->bdev) {
|
||||||
disk_kobj = &part_to_dev(disk)->kobj;
|
struct hd_struct *disk;
|
||||||
|
struct kobject *disk_kobj;
|
||||||
|
|
||||||
error = sysfs_create_link(fs_devices->device_dir_kobj,
|
disk = dev->bdev->bd_part;
|
||||||
disk_kobj, disk_kobj->name);
|
disk_kobj = &part_to_dev(disk)->kobj;
|
||||||
if (error)
|
|
||||||
|
error = sysfs_create_link(fs_devices->devices_kobj,
|
||||||
|
disk_kobj, disk_kobj->name);
|
||||||
|
if (error)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
init_completion(&dev->kobj_unregister);
|
||||||
|
error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
|
||||||
|
fs_devices->devices_kobj, "%llu",
|
||||||
|
dev->devid);
|
||||||
|
if (error) {
|
||||||
|
kobject_put(&dev->devid_kobj);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return error;
|
return error;
|
||||||
@ -1063,27 +1326,49 @@ void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices,
|
|||||||
"sysfs: failed to create fsid for sprout");
|
"sysfs: failed to create fsid for sprout");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void btrfs_sysfs_update_devid(struct btrfs_device *device)
|
||||||
|
{
|
||||||
|
char tmp[24];
|
||||||
|
|
||||||
|
snprintf(tmp, sizeof(tmp), "%llu", device->devid);
|
||||||
|
|
||||||
|
if (kobject_rename(&device->devid_kobj, tmp))
|
||||||
|
btrfs_warn(device->fs_devices->fs_info,
|
||||||
|
"sysfs: failed to update devid for %llu",
|
||||||
|
device->devid);
|
||||||
|
}
|
||||||
|
|
||||||
/* /sys/fs/btrfs/ entry */
|
/* /sys/fs/btrfs/ entry */
|
||||||
static struct kset *btrfs_kset;
|
static struct kset *btrfs_kset;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* Creates:
|
||||||
|
* /sys/fs/btrfs/UUID
|
||||||
|
*
|
||||||
* Can be called by the device discovery thread.
|
* Can be called by the device discovery thread.
|
||||||
* And parent can be specified for seed device
|
|
||||||
*/
|
*/
|
||||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
|
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
|
||||||
struct kobject *parent)
|
|
||||||
{
|
{
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
init_completion(&fs_devs->kobj_unregister);
|
init_completion(&fs_devs->kobj_unregister);
|
||||||
fs_devs->fsid_kobj.kset = btrfs_kset;
|
fs_devs->fsid_kobj.kset = btrfs_kset;
|
||||||
error = kobject_init_and_add(&fs_devs->fsid_kobj,
|
error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL,
|
||||||
&btrfs_ktype, parent, "%pU", fs_devs->fsid);
|
"%pU", fs_devs->fsid);
|
||||||
if (error) {
|
if (error) {
|
||||||
kobject_put(&fs_devs->fsid_kobj);
|
kobject_put(&fs_devs->fsid_kobj);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fs_devs->devices_kobj = kobject_create_and_add("devices",
|
||||||
|
&fs_devs->fsid_kobj);
|
||||||
|
if (!fs_devs->devices_kobj) {
|
||||||
|
btrfs_err(fs_devs->fs_info,
|
||||||
|
"failed to init sysfs device interface");
|
||||||
|
kobject_put(&fs_devs->fsid_kobj);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1111,8 +1396,26 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
|
|||||||
goto failure;
|
goto failure;
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_DEBUG
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
error = sysfs_create_group(fsid_kobj,
|
fs_info->debug_kobj = kobject_create_and_add("debug", fsid_kobj);
|
||||||
&btrfs_debug_feature_attr_group);
|
if (!fs_info->debug_kobj) {
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto failure;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
|
||||||
|
if (error)
|
||||||
|
goto failure;
|
||||||
|
|
||||||
|
/* Discard directory */
|
||||||
|
fs_info->discard_debug_kobj = kobject_create_and_add("discard",
|
||||||
|
fs_info->debug_kobj);
|
||||||
|
if (!fs_info->discard_debug_kobj) {
|
||||||
|
error = -ENOMEM;
|
||||||
|
goto failure;
|
||||||
|
}
|
||||||
|
|
||||||
|
error = sysfs_create_files(fs_info->discard_debug_kobj,
|
||||||
|
discard_debug_attrs);
|
||||||
if (error)
|
if (error)
|
||||||
goto failure;
|
goto failure;
|
||||||
#endif
|
#endif
|
||||||
@ -1209,6 +1512,9 @@ void __cold btrfs_exit_sysfs(void)
|
|||||||
sysfs_unmerge_group(&btrfs_kset->kobj,
|
sysfs_unmerge_group(&btrfs_kset->kobj,
|
||||||
&btrfs_static_feature_attr_group);
|
&btrfs_static_feature_attr_group);
|
||||||
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
|
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
|
||||||
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
|
||||||
|
#endif
|
||||||
kset_unregister(btrfs_kset);
|
kset_unregister(btrfs_kset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,9 +18,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
|||||||
struct btrfs_device *one_device);
|
struct btrfs_device *one_device);
|
||||||
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||||
struct btrfs_device *one_device);
|
struct btrfs_device *one_device);
|
||||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
|
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs);
|
||||||
struct kobject *parent);
|
|
||||||
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
|
|
||||||
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
|
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
|
||||||
void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices,
|
void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices,
|
||||||
const u8 *fsid);
|
const u8 *fsid);
|
||||||
@ -36,5 +34,6 @@ void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache);
|
|||||||
int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
|
int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
|
||||||
struct btrfs_space_info *space_info);
|
struct btrfs_space_info *space_info);
|
||||||
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
|
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
|
||||||
|
void btrfs_sysfs_update_devid(struct btrfs_device *device);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -86,6 +86,27 @@ static void btrfs_destroy_test_fs(void)
|
|||||||
unregister_filesystem(&test_type);
|
unregister_filesystem(&test_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info)
|
||||||
|
{
|
||||||
|
struct btrfs_device *dev;
|
||||||
|
|
||||||
|
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||||
|
if (!dev)
|
||||||
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
|
extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
|
||||||
|
INIT_LIST_HEAD(&dev->dev_list);
|
||||||
|
list_add(&dev->dev_list, &fs_info->fs_devices->devices);
|
||||||
|
|
||||||
|
return dev;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void btrfs_free_dummy_device(struct btrfs_device *dev)
|
||||||
|
{
|
||||||
|
extent_io_tree_release(&dev->alloc_state);
|
||||||
|
kfree(dev);
|
||||||
|
}
|
||||||
|
|
||||||
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
|
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
|
||||||
@ -132,12 +153,14 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
|||||||
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
|
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
|
||||||
INIT_LIST_HEAD(&fs_info->dead_roots);
|
INIT_LIST_HEAD(&fs_info->dead_roots);
|
||||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||||
|
INIT_LIST_HEAD(&fs_info->fs_devices->devices);
|
||||||
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
|
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
|
||||||
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
|
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
|
||||||
extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
|
extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
|
||||||
IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
|
IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
|
||||||
extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
|
extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
|
||||||
IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
|
IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
|
||||||
|
extent_map_tree_init(&fs_info->mapping_tree);
|
||||||
fs_info->pinned_extents = &fs_info->freed_extents[0];
|
fs_info->pinned_extents = &fs_info->freed_extents[0];
|
||||||
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||||
|
|
||||||
@ -150,6 +173,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
|||||||
{
|
{
|
||||||
struct radix_tree_iter iter;
|
struct radix_tree_iter iter;
|
||||||
void **slot;
|
void **slot;
|
||||||
|
struct btrfs_device *dev, *tmp;
|
||||||
|
|
||||||
if (!fs_info)
|
if (!fs_info)
|
||||||
return;
|
return;
|
||||||
@ -180,6 +204,11 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
|||||||
}
|
}
|
||||||
spin_unlock(&fs_info->buffer_lock);
|
spin_unlock(&fs_info->buffer_lock);
|
||||||
|
|
||||||
|
btrfs_mapping_tree_free(&fs_info->mapping_tree);
|
||||||
|
list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,
|
||||||
|
dev_list) {
|
||||||
|
btrfs_free_dummy_device(dev);
|
||||||
|
}
|
||||||
btrfs_free_qgroup_config(fs_info);
|
btrfs_free_qgroup_config(fs_info);
|
||||||
btrfs_free_fs_roots(fs_info);
|
btrfs_free_fs_roots(fs_info);
|
||||||
cleanup_srcu_struct(&fs_info->subvol_srcu);
|
cleanup_srcu_struct(&fs_info->subvol_srcu);
|
||||||
|
@ -46,6 +46,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long lengt
|
|||||||
void btrfs_free_dummy_block_group(struct btrfs_block_group *cache);
|
void btrfs_free_dummy_block_group(struct btrfs_block_group *cache);
|
||||||
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
|
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_fs_info *fs_info);
|
struct btrfs_fs_info *fs_info);
|
||||||
|
struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info);
|
||||||
#else
|
#else
|
||||||
static inline int btrfs_run_sanity_tests(void)
|
static inline int btrfs_run_sanity_tests(void)
|
||||||
{
|
{
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#include "btrfs-tests.h"
|
#include "btrfs-tests.h"
|
||||||
#include "../ctree.h"
|
#include "../ctree.h"
|
||||||
|
#include "../volumes.h"
|
||||||
|
#include "../disk-io.h"
|
||||||
|
#include "../block-group.h"
|
||||||
|
|
||||||
static void free_extent_map_tree(struct extent_map_tree *em_tree)
|
static void free_extent_map_tree(struct extent_map_tree *em_tree)
|
||||||
{
|
{
|
||||||
@ -437,11 +440,153 @@ static int test_case_4(struct btrfs_fs_info *fs_info,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct rmap_test_vector {
|
||||||
|
u64 raid_type;
|
||||||
|
u64 physical_start;
|
||||||
|
u64 data_stripe_size;
|
||||||
|
u64 num_data_stripes;
|
||||||
|
u64 num_stripes;
|
||||||
|
/* Assume we won't have more than 5 physical stripes */
|
||||||
|
u64 data_stripe_phys_start[5];
|
||||||
|
bool expected_mapped_addr;
|
||||||
|
/* Physical to logical addresses */
|
||||||
|
u64 mapped_logical[5];
|
||||||
|
};
|
||||||
|
|
||||||
|
static int test_rmap_block(struct btrfs_fs_info *fs_info,
|
||||||
|
struct rmap_test_vector *test)
|
||||||
|
{
|
||||||
|
struct extent_map *em;
|
||||||
|
struct map_lookup *map = NULL;
|
||||||
|
u64 *logical = NULL;
|
||||||
|
int i, out_ndaddrs, out_stripe_len;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
em = alloc_extent_map();
|
||||||
|
if (!em) {
|
||||||
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
map = kmalloc(map_lookup_size(test->num_stripes), GFP_KERNEL);
|
||||||
|
if (!map) {
|
||||||
|
kfree(em);
|
||||||
|
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
|
||||||
|
/* Start at 4GiB logical address */
|
||||||
|
em->start = SZ_4G;
|
||||||
|
em->len = test->data_stripe_size * test->num_data_stripes;
|
||||||
|
em->block_len = em->len;
|
||||||
|
em->orig_block_len = test->data_stripe_size;
|
||||||
|
em->map_lookup = map;
|
||||||
|
|
||||||
|
map->num_stripes = test->num_stripes;
|
||||||
|
map->stripe_len = BTRFS_STRIPE_LEN;
|
||||||
|
map->type = test->raid_type;
|
||||||
|
|
||||||
|
for (i = 0; i < map->num_stripes; i++) {
|
||||||
|
struct btrfs_device *dev = btrfs_alloc_dummy_device(fs_info);
|
||||||
|
|
||||||
|
if (IS_ERR(dev)) {
|
||||||
|
test_err("cannot allocate device");
|
||||||
|
ret = PTR_ERR(dev);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
map->stripes[i].dev = dev;
|
||||||
|
map->stripes[i].physical = test->data_stripe_phys_start[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
write_lock(&fs_info->mapping_tree.lock);
|
||||||
|
ret = add_extent_mapping(&fs_info->mapping_tree, em, 0);
|
||||||
|
write_unlock(&fs_info->mapping_tree.lock);
|
||||||
|
if (ret) {
|
||||||
|
test_err("error adding block group mapping to mapping tree");
|
||||||
|
goto out_free;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = btrfs_rmap_block(fs_info, em->start, btrfs_sb_offset(1),
|
||||||
|
&logical, &out_ndaddrs, &out_stripe_len);
|
||||||
|
if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
|
||||||
|
test_err("didn't rmap anything but expected %d",
|
||||||
|
test->expected_mapped_addr);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (out_stripe_len != BTRFS_STRIPE_LEN) {
|
||||||
|
test_err("calculated stripe length doesn't match");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (out_ndaddrs != test->expected_mapped_addr) {
|
||||||
|
for (i = 0; i < out_ndaddrs; i++)
|
||||||
|
test_msg("mapped %llu", logical[i]);
|
||||||
|
test_err("unexpected number of mapped addresses: %d", out_ndaddrs);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < out_ndaddrs; i++) {
|
||||||
|
if (logical[i] != test->mapped_logical[i]) {
|
||||||
|
test_err("unexpected logical address mapped");
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
out:
|
||||||
|
write_lock(&fs_info->mapping_tree.lock);
|
||||||
|
remove_extent_mapping(&fs_info->mapping_tree, em);
|
||||||
|
write_unlock(&fs_info->mapping_tree.lock);
|
||||||
|
/* For us */
|
||||||
|
free_extent_map(em);
|
||||||
|
out_free:
|
||||||
|
/* For the tree */
|
||||||
|
free_extent_map(em);
|
||||||
|
kfree(logical);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_test_extent_map(void)
|
int btrfs_test_extent_map(void)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = NULL;
|
struct btrfs_fs_info *fs_info = NULL;
|
||||||
struct extent_map_tree *em_tree;
|
struct extent_map_tree *em_tree;
|
||||||
int ret = 0;
|
int ret = 0, i;
|
||||||
|
struct rmap_test_vector rmap_tests[] = {
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Test a chunk with 2 data stripes one of which
|
||||||
|
* interesects the physical address of the super block
|
||||||
|
* is correctly recognised.
|
||||||
|
*/
|
||||||
|
.raid_type = BTRFS_BLOCK_GROUP_RAID1,
|
||||||
|
.physical_start = SZ_64M - SZ_4M,
|
||||||
|
.data_stripe_size = SZ_256M,
|
||||||
|
.num_data_stripes = 2,
|
||||||
|
.num_stripes = 2,
|
||||||
|
.data_stripe_phys_start =
|
||||||
|
{SZ_64M - SZ_4M, SZ_64M - SZ_4M + SZ_256M},
|
||||||
|
.expected_mapped_addr = true,
|
||||||
|
.mapped_logical= {SZ_4G + SZ_4M}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Test that out-of-range physical addresses are
|
||||||
|
* ignored
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* SINGLE chunk type */
|
||||||
|
.raid_type = 0,
|
||||||
|
.physical_start = SZ_4G,
|
||||||
|
.data_stripe_size = SZ_256M,
|
||||||
|
.num_data_stripes = 1,
|
||||||
|
.num_stripes = 1,
|
||||||
|
.data_stripe_phys_start = {SZ_256M},
|
||||||
|
.expected_mapped_addr = false,
|
||||||
|
.mapped_logical = {0}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
test_msg("running extent_map tests");
|
test_msg("running extent_map tests");
|
||||||
|
|
||||||
@ -474,6 +619,13 @@ int btrfs_test_extent_map(void)
|
|||||||
goto out;
|
goto out;
|
||||||
ret = test_case_4(fs_info, em_tree);
|
ret = test_case_4(fs_info, em_tree);
|
||||||
|
|
||||||
|
test_msg("running rmap tests");
|
||||||
|
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
|
||||||
|
ret = test_rmap_block(fs_info, &rmap_tests[i]);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
kfree(em_tree);
|
kfree(em_tree);
|
||||||
btrfs_free_dummy_fs_info(fs_info);
|
btrfs_free_dummy_fs_info(fs_info);
|
||||||
|
@ -263,7 +263,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
|
|
||||||
/* First with no extents */
|
/* First with no extents */
|
||||||
BTRFS_I(inode)->root = root;
|
BTRFS_I(inode)->root = root;
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
em = NULL;
|
em = NULL;
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
@ -283,7 +283,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
*/
|
*/
|
||||||
setup_file_extents(root, sectorsize);
|
setup_file_extents(root, sectorsize);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -305,7 +305,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -333,7 +333,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -356,7 +356,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* Regular extent */
|
/* Regular extent */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -384,7 +384,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* The next 3 are split extents */
|
/* The next 3 are split extents */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -413,7 +413,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -435,7 +435,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -469,7 +469,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* Prealloc extent */
|
/* Prealloc extent */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -498,7 +498,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* The next 3 are a half written prealloc extent */
|
/* The next 3 are a half written prealloc extent */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -528,7 +528,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -561,7 +561,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -596,7 +596,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* Now for the compressed extent */
|
/* Now for the compressed extent */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -630,7 +630,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* Split compressed extent */
|
/* Split compressed extent */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -665,7 +665,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -692,7 +692,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -727,8 +727,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
/* A hole between regular extents but no hole extent */
|
/* A hole between regular extents but no hole extent */
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6, sectorsize);
|
||||||
sectorsize, 0);
|
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -755,7 +754,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -788,7 +787,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
|||||||
offset = em->start + em->len;
|
offset = em->start + em->len;
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -872,7 +871,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
|||||||
insert_inode_item_key(root);
|
insert_inode_item_key(root);
|
||||||
insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
|
insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
|
||||||
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
|
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize);
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
@ -894,8 +893,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
|||||||
}
|
}
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
|
|
||||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
|
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize, 2 * sectorsize);
|
||||||
2 * sectorsize, 0);
|
|
||||||
if (IS_ERR(em)) {
|
if (IS_ERR(em)) {
|
||||||
test_err("got an error when we shouldn't have");
|
test_err("got an error when we shouldn't have");
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -147,13 +147,14 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline void switch_commit_roots(struct btrfs_transaction *trans)
|
static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||||
{
|
{
|
||||||
|
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||||
struct btrfs_root *root, *tmp;
|
struct btrfs_root *root, *tmp;
|
||||||
|
|
||||||
down_write(&fs_info->commit_root_sem);
|
down_write(&fs_info->commit_root_sem);
|
||||||
list_for_each_entry_safe(root, tmp, &trans->switch_commits,
|
list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
|
||||||
dirty_list) {
|
dirty_list) {
|
||||||
list_del_init(&root->dirty_list);
|
list_del_init(&root->dirty_list);
|
||||||
free_extent_buffer(root->commit_root);
|
free_extent_buffer(root->commit_root);
|
||||||
@ -165,16 +166,17 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* We can free old roots now. */
|
/* We can free old roots now. */
|
||||||
spin_lock(&trans->dropped_roots_lock);
|
spin_lock(&cur_trans->dropped_roots_lock);
|
||||||
while (!list_empty(&trans->dropped_roots)) {
|
while (!list_empty(&cur_trans->dropped_roots)) {
|
||||||
root = list_first_entry(&trans->dropped_roots,
|
root = list_first_entry(&cur_trans->dropped_roots,
|
||||||
struct btrfs_root, root_list);
|
struct btrfs_root, root_list);
|
||||||
list_del_init(&root->root_list);
|
list_del_init(&root->root_list);
|
||||||
spin_unlock(&trans->dropped_roots_lock);
|
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||||
|
btrfs_free_log(trans, root);
|
||||||
btrfs_drop_and_free_fs_root(fs_info, root);
|
btrfs_drop_and_free_fs_root(fs_info, root);
|
||||||
spin_lock(&trans->dropped_roots_lock);
|
spin_lock(&cur_trans->dropped_roots_lock);
|
||||||
}
|
}
|
||||||
spin_unlock(&trans->dropped_roots_lock);
|
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||||
up_write(&fs_info->commit_root_sem);
|
up_write(&fs_info->commit_root_sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1421,7 +1423,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
|
|||||||
ret = commit_cowonly_roots(trans);
|
ret = commit_cowonly_roots(trans);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
switch_commit_roots(trans->transaction);
|
switch_commit_roots(trans);
|
||||||
ret = btrfs_write_and_wait_transaction(trans);
|
ret = btrfs_write_and_wait_transaction(trans);
|
||||||
if (ret)
|
if (ret)
|
||||||
btrfs_handle_fs_error(fs_info, ret,
|
btrfs_handle_fs_error(fs_info, ret,
|
||||||
@ -2013,6 +2015,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|||||||
|
|
||||||
ASSERT(refcount_read(&trans->use_count) == 1);
|
ASSERT(refcount_read(&trans->use_count) == 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some places just start a transaction to commit it. We need to make
|
||||||
|
* sure that if this commit fails that the abort code actually marks the
|
||||||
|
* transaction as failed, so set trans->dirty to make the abort code do
|
||||||
|
* the right thing.
|
||||||
|
*/
|
||||||
|
trans->dirty = true;
|
||||||
|
|
||||||
/* Stop the commit early if ->aborted is set */
|
/* Stop the commit early if ->aborted is set */
|
||||||
if (unlikely(READ_ONCE(cur_trans->aborted))) {
|
if (unlikely(READ_ONCE(cur_trans->aborted))) {
|
||||||
ret = cur_trans->aborted;
|
ret = cur_trans->aborted;
|
||||||
@ -2301,7 +2311,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
|||||||
list_add_tail(&fs_info->chunk_root->dirty_list,
|
list_add_tail(&fs_info->chunk_root->dirty_list,
|
||||||
&cur_trans->switch_commits);
|
&cur_trans->switch_commits);
|
||||||
|
|
||||||
switch_commit_roots(cur_trans);
|
switch_commit_roots(trans);
|
||||||
|
|
||||||
ASSERT(list_empty(&cur_trans->dirty_bgs));
|
ASSERT(list_empty(&cur_trans->dirty_bgs));
|
||||||
ASSERT(list_empty(&cur_trans->io_bgs));
|
ASSERT(list_empty(&cur_trans->io_bgs));
|
||||||
|
@ -373,6 +373,104 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Inode item error output has the same format as dir_item_err() */
|
||||||
|
#define inode_item_err(eb, slot, fmt, ...) \
|
||||||
|
dir_item_err(eb, slot, fmt, __VA_ARGS__)
|
||||||
|
|
||||||
|
static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key,
|
||||||
|
int slot)
|
||||||
|
{
|
||||||
|
struct btrfs_key item_key;
|
||||||
|
bool is_inode_item;
|
||||||
|
|
||||||
|
btrfs_item_key_to_cpu(leaf, &item_key, slot);
|
||||||
|
is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY);
|
||||||
|
|
||||||
|
/* For XATTR_ITEM, location key should be all 0 */
|
||||||
|
if (item_key.type == BTRFS_XATTR_ITEM_KEY) {
|
||||||
|
if (key->type != 0 || key->objectid != 0 || key->offset != 0)
|
||||||
|
return -EUCLEAN;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
|
||||||
|
key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
|
||||||
|
key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
|
||||||
|
key->objectid != BTRFS_FREE_INO_OBJECTID) {
|
||||||
|
if (is_inode_item) {
|
||||||
|
generic_err(leaf, slot,
|
||||||
|
"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
|
||||||
|
key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
|
||||||
|
BTRFS_FIRST_FREE_OBJECTID,
|
||||||
|
BTRFS_LAST_FREE_OBJECTID,
|
||||||
|
BTRFS_FREE_INO_OBJECTID);
|
||||||
|
} else {
|
||||||
|
dir_item_err(leaf, slot,
|
||||||
|
"invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
|
||||||
|
key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
|
||||||
|
BTRFS_FIRST_FREE_OBJECTID,
|
||||||
|
BTRFS_LAST_FREE_OBJECTID,
|
||||||
|
BTRFS_FREE_INO_OBJECTID);
|
||||||
|
}
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
if (key->offset != 0) {
|
||||||
|
if (is_inode_item)
|
||||||
|
inode_item_err(leaf, slot,
|
||||||
|
"invalid key offset: has %llu expect 0",
|
||||||
|
key->offset);
|
||||||
|
else
|
||||||
|
dir_item_err(leaf, slot,
|
||||||
|
"invalid location key offset:has %llu expect 0",
|
||||||
|
key->offset);
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
|
||||||
|
int slot)
|
||||||
|
{
|
||||||
|
struct btrfs_key item_key;
|
||||||
|
bool is_root_item;
|
||||||
|
|
||||||
|
btrfs_item_key_to_cpu(leaf, &item_key, slot);
|
||||||
|
is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
|
||||||
|
|
||||||
|
/* No such tree id */
|
||||||
|
if (key->objectid == 0) {
|
||||||
|
if (is_root_item)
|
||||||
|
generic_err(leaf, slot, "invalid root id 0");
|
||||||
|
else
|
||||||
|
dir_item_err(leaf, slot,
|
||||||
|
"invalid location key root id 0");
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
|
||||||
|
if (!is_fstree(key->objectid) && !is_root_item) {
|
||||||
|
dir_item_err(leaf, slot,
|
||||||
|
"invalid location key objectid, have %llu expect [%llu, %llu]",
|
||||||
|
key->objectid, BTRFS_FIRST_FREE_OBJECTID,
|
||||||
|
BTRFS_LAST_FREE_OBJECTID);
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ROOT_ITEM with non-zero offset means this is a snapshot, created at
|
||||||
|
* @offset transid.
|
||||||
|
* Furthermore, for location key in DIR_ITEM, its offset is always -1.
|
||||||
|
*
|
||||||
|
* So here we only check offset for reloc tree whose key->offset must
|
||||||
|
* be a valid tree.
|
||||||
|
*/
|
||||||
|
if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) {
|
||||||
|
generic_err(leaf, slot, "invalid root id 0 for reloc tree");
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int check_dir_item(struct extent_buffer *leaf,
|
static int check_dir_item(struct extent_buffer *leaf,
|
||||||
struct btrfs_key *key, struct btrfs_key *prev_key,
|
struct btrfs_key *key, struct btrfs_key *prev_key,
|
||||||
int slot)
|
int slot)
|
||||||
@ -386,12 +484,14 @@ static int check_dir_item(struct extent_buffer *leaf,
|
|||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
||||||
while (cur < item_size) {
|
while (cur < item_size) {
|
||||||
|
struct btrfs_key location_key;
|
||||||
u32 name_len;
|
u32 name_len;
|
||||||
u32 data_len;
|
u32 data_len;
|
||||||
u32 max_name_len;
|
u32 max_name_len;
|
||||||
u32 total_size;
|
u32 total_size;
|
||||||
u32 name_hash;
|
u32 name_hash;
|
||||||
u8 dir_type;
|
u8 dir_type;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/* header itself should not cross item boundary */
|
/* header itself should not cross item boundary */
|
||||||
if (cur + sizeof(*di) > item_size) {
|
if (cur + sizeof(*di) > item_size) {
|
||||||
@ -401,6 +501,25 @@ static int check_dir_item(struct extent_buffer *leaf,
|
|||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Location key check */
|
||||||
|
btrfs_dir_item_key_to_cpu(leaf, di, &location_key);
|
||||||
|
if (location_key.type == BTRFS_ROOT_ITEM_KEY) {
|
||||||
|
ret = check_root_key(leaf, &location_key, slot);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
} else if (location_key.type == BTRFS_INODE_ITEM_KEY ||
|
||||||
|
location_key.type == 0) {
|
||||||
|
ret = check_inode_key(leaf, &location_key, slot);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
} else {
|
||||||
|
dir_item_err(leaf, slot,
|
||||||
|
"invalid location key type, have %u, expect %u or %u",
|
||||||
|
location_key.type, BTRFS_ROOT_ITEM_KEY,
|
||||||
|
BTRFS_INODE_ITEM_KEY);
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
|
||||||
/* dir type check */
|
/* dir type check */
|
||||||
dir_type = btrfs_dir_type(leaf, di);
|
dir_type = btrfs_dir_type(leaf, di);
|
||||||
if (dir_type >= BTRFS_FT_MAX) {
|
if (dir_type >= BTRFS_FT_MAX) {
|
||||||
@ -738,6 +857,44 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enhanced version of chunk item checker.
|
||||||
|
*
|
||||||
|
* The common btrfs_check_chunk_valid() doesn't check item size since it needs
|
||||||
|
* to work on super block sys_chunk_array which doesn't have full item ptr.
|
||||||
|
*/
|
||||||
|
static int check_leaf_chunk_item(struct extent_buffer *leaf,
|
||||||
|
struct btrfs_chunk *chunk,
|
||||||
|
struct btrfs_key *key, int slot)
|
||||||
|
{
|
||||||
|
int num_stripes;
|
||||||
|
|
||||||
|
if (btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
|
||||||
|
chunk_err(leaf, chunk, key->offset,
|
||||||
|
"invalid chunk item size: have %u expect [%zu, %u)",
|
||||||
|
btrfs_item_size_nr(leaf, slot),
|
||||||
|
sizeof(struct btrfs_chunk),
|
||||||
|
BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||||
|
/* Let btrfs_check_chunk_valid() handle this error type */
|
||||||
|
if (num_stripes == 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (btrfs_chunk_item_size(num_stripes) !=
|
||||||
|
btrfs_item_size_nr(leaf, slot)) {
|
||||||
|
chunk_err(leaf, chunk, key->offset,
|
||||||
|
"invalid chunk item size: have %u expect %lu",
|
||||||
|
btrfs_item_size_nr(leaf, slot),
|
||||||
|
btrfs_chunk_item_size(num_stripes));
|
||||||
|
return -EUCLEAN;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
return btrfs_check_chunk_valid(leaf, chunk, key->offset);
|
||||||
|
}
|
||||||
|
|
||||||
__printf(3, 4)
|
__printf(3, 4)
|
||||||
__cold
|
__cold
|
||||||
static void dev_item_err(const struct extent_buffer *eb, int slot,
|
static void dev_item_err(const struct extent_buffer *eb, int slot,
|
||||||
@ -801,7 +958,7 @@ static int check_dev_item(struct extent_buffer *leaf,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Inode item error output has the same format as dir_item_err() */
|
/* Inode item error output has the same format as dir_item_err() */
|
||||||
#define inode_item_err(fs_info, eb, slot, fmt, ...) \
|
#define inode_item_err(eb, slot, fmt, ...) \
|
||||||
dir_item_err(eb, slot, fmt, __VA_ARGS__)
|
dir_item_err(eb, slot, fmt, __VA_ARGS__)
|
||||||
|
|
||||||
static int check_inode_item(struct extent_buffer *leaf,
|
static int check_inode_item(struct extent_buffer *leaf,
|
||||||
@ -812,30 +969,17 @@ static int check_inode_item(struct extent_buffer *leaf,
|
|||||||
u64 super_gen = btrfs_super_generation(fs_info->super_copy);
|
u64 super_gen = btrfs_super_generation(fs_info->super_copy);
|
||||||
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
|
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
|
||||||
u32 mode;
|
u32 mode;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = check_inode_key(leaf, key, slot);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
|
|
||||||
key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
|
|
||||||
key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
|
|
||||||
key->objectid != BTRFS_FREE_INO_OBJECTID) {
|
|
||||||
generic_err(leaf, slot,
|
|
||||||
"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
|
|
||||||
key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
|
|
||||||
BTRFS_FIRST_FREE_OBJECTID,
|
|
||||||
BTRFS_LAST_FREE_OBJECTID,
|
|
||||||
BTRFS_FREE_INO_OBJECTID);
|
|
||||||
return -EUCLEAN;
|
|
||||||
}
|
|
||||||
if (key->offset != 0) {
|
|
||||||
inode_item_err(fs_info, leaf, slot,
|
|
||||||
"invalid key offset: has %llu expect 0",
|
|
||||||
key->offset);
|
|
||||||
return -EUCLEAN;
|
|
||||||
}
|
|
||||||
iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
|
iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
|
||||||
|
|
||||||
/* Here we use super block generation + 1 to handle log tree */
|
/* Here we use super block generation + 1 to handle log tree */
|
||||||
if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
|
if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
|
||||||
inode_item_err(fs_info, leaf, slot,
|
inode_item_err(leaf, slot,
|
||||||
"invalid inode generation: has %llu expect (0, %llu]",
|
"invalid inode generation: has %llu expect (0, %llu]",
|
||||||
btrfs_inode_generation(leaf, iitem),
|
btrfs_inode_generation(leaf, iitem),
|
||||||
super_gen + 1);
|
super_gen + 1);
|
||||||
@ -843,7 +987,7 @@ static int check_inode_item(struct extent_buffer *leaf,
|
|||||||
}
|
}
|
||||||
/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
|
/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
|
||||||
if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
|
if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
|
||||||
inode_item_err(fs_info, leaf, slot,
|
inode_item_err(leaf, slot,
|
||||||
"invalid inode generation: has %llu expect [0, %llu]",
|
"invalid inode generation: has %llu expect [0, %llu]",
|
||||||
btrfs_inode_transid(leaf, iitem), super_gen + 1);
|
btrfs_inode_transid(leaf, iitem), super_gen + 1);
|
||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
@ -856,7 +1000,7 @@ static int check_inode_item(struct extent_buffer *leaf,
|
|||||||
*/
|
*/
|
||||||
mode = btrfs_inode_mode(leaf, iitem);
|
mode = btrfs_inode_mode(leaf, iitem);
|
||||||
if (mode & ~valid_mask) {
|
if (mode & ~valid_mask) {
|
||||||
inode_item_err(fs_info, leaf, slot,
|
inode_item_err(leaf, slot,
|
||||||
"unknown mode bit detected: 0x%x",
|
"unknown mode bit detected: 0x%x",
|
||||||
mode & ~valid_mask);
|
mode & ~valid_mask);
|
||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
@ -869,20 +1013,20 @@ static int check_inode_item(struct extent_buffer *leaf,
|
|||||||
*/
|
*/
|
||||||
if (!has_single_bit_set(mode & S_IFMT)) {
|
if (!has_single_bit_set(mode & S_IFMT)) {
|
||||||
if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
|
if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
|
||||||
inode_item_err(fs_info, leaf, slot,
|
inode_item_err(leaf, slot,
|
||||||
"invalid mode: has 0%o expect valid S_IF* bit(s)",
|
"invalid mode: has 0%o expect valid S_IF* bit(s)",
|
||||||
mode & S_IFMT);
|
mode & S_IFMT);
|
||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
|
if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
|
||||||
inode_item_err(fs_info, leaf, slot,
|
inode_item_err(leaf, slot,
|
||||||
"invalid nlink: has %u expect no more than 1 for dir",
|
"invalid nlink: has %u expect no more than 1 for dir",
|
||||||
btrfs_inode_nlink(leaf, iitem));
|
btrfs_inode_nlink(leaf, iitem));
|
||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
}
|
}
|
||||||
if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
|
if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
|
||||||
inode_item_err(fs_info, leaf, slot,
|
inode_item_err(leaf, slot,
|
||||||
"unknown flags detected: 0x%llx",
|
"unknown flags detected: 0x%llx",
|
||||||
btrfs_inode_flags(leaf, iitem) &
|
btrfs_inode_flags(leaf, iitem) &
|
||||||
~BTRFS_INODE_FLAG_MASK);
|
~BTRFS_INODE_FLAG_MASK);
|
||||||
@ -898,22 +1042,11 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
|
|||||||
struct btrfs_root_item ri;
|
struct btrfs_root_item ri;
|
||||||
const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
|
const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
|
||||||
BTRFS_ROOT_SUBVOL_DEAD;
|
BTRFS_ROOT_SUBVOL_DEAD;
|
||||||
|
int ret;
|
||||||
|
|
||||||
/* No such tree id */
|
ret = check_root_key(leaf, key, slot);
|
||||||
if (key->objectid == 0) {
|
if (ret < 0)
|
||||||
generic_err(leaf, slot, "invalid root id 0");
|
return ret;
|
||||||
return -EUCLEAN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Some older kernel may create ROOT_ITEM with non-zero offset, so here
|
|
||||||
* we only check offset for reloc tree whose key->offset must be a
|
|
||||||
* valid tree.
|
|
||||||
*/
|
|
||||||
if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) {
|
|
||||||
generic_err(leaf, slot, "invalid root id 0 for reloc tree");
|
|
||||||
return -EUCLEAN;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
|
if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
|
||||||
generic_err(leaf, slot,
|
generic_err(leaf, slot,
|
||||||
@ -1302,8 +1435,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define inode_ref_err(fs_info, eb, slot, fmt, args...) \
|
#define inode_ref_err(eb, slot, fmt, args...) \
|
||||||
inode_item_err(fs_info, eb, slot, fmt, ##args)
|
inode_item_err(eb, slot, fmt, ##args)
|
||||||
static int check_inode_ref(struct extent_buffer *leaf,
|
static int check_inode_ref(struct extent_buffer *leaf,
|
||||||
struct btrfs_key *key, struct btrfs_key *prev_key,
|
struct btrfs_key *key, struct btrfs_key *prev_key,
|
||||||
int slot)
|
int slot)
|
||||||
@ -1316,7 +1449,7 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
|||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
/* namelen can't be 0, so item_size == sizeof() is also invalid */
|
/* namelen can't be 0, so item_size == sizeof() is also invalid */
|
||||||
if (btrfs_item_size_nr(leaf, slot) <= sizeof(*iref)) {
|
if (btrfs_item_size_nr(leaf, slot) <= sizeof(*iref)) {
|
||||||
inode_ref_err(fs_info, leaf, slot,
|
inode_ref_err(leaf, slot,
|
||||||
"invalid item size, have %u expect (%zu, %u)",
|
"invalid item size, have %u expect (%zu, %u)",
|
||||||
btrfs_item_size_nr(leaf, slot),
|
btrfs_item_size_nr(leaf, slot),
|
||||||
sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
|
sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
|
||||||
@ -1329,7 +1462,7 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
|||||||
u16 namelen;
|
u16 namelen;
|
||||||
|
|
||||||
if (ptr + sizeof(iref) > end) {
|
if (ptr + sizeof(iref) > end) {
|
||||||
inode_ref_err(fs_info, leaf, slot,
|
inode_ref_err(leaf, slot,
|
||||||
"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
|
"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
|
||||||
ptr, end, sizeof(iref));
|
ptr, end, sizeof(iref));
|
||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
@ -1338,7 +1471,7 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
|||||||
iref = (struct btrfs_inode_ref *)ptr;
|
iref = (struct btrfs_inode_ref *)ptr;
|
||||||
namelen = btrfs_inode_ref_name_len(leaf, iref);
|
namelen = btrfs_inode_ref_name_len(leaf, iref);
|
||||||
if (ptr + sizeof(*iref) + namelen > end) {
|
if (ptr + sizeof(*iref) + namelen > end) {
|
||||||
inode_ref_err(fs_info, leaf, slot,
|
inode_ref_err(leaf, slot,
|
||||||
"inode ref overflow, ptr %lu end %lu namelen %u",
|
"inode ref overflow, ptr %lu end %lu namelen %u",
|
||||||
ptr, end, namelen);
|
ptr, end, namelen);
|
||||||
return -EUCLEAN;
|
return -EUCLEAN;
|
||||||
@ -1384,7 +1517,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
|
|||||||
break;
|
break;
|
||||||
case BTRFS_CHUNK_ITEM_KEY:
|
case BTRFS_CHUNK_ITEM_KEY:
|
||||||
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
|
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
|
||||||
ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
|
ret = check_leaf_chunk_item(leaf, chunk, key, slot);
|
||||||
break;
|
break;
|
||||||
case BTRFS_DEV_ITEM_KEY:
|
case BTRFS_DEV_ITEM_KEY:
|
||||||
ret = check_dev_item(leaf, key, slot);
|
ret = check_dev_item(leaf, key, slot);
|
||||||
|
@ -2674,14 +2674,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
|||||||
u32 blocksize;
|
u32 blocksize;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
WARN_ON(*level < 0);
|
|
||||||
WARN_ON(*level >= BTRFS_MAX_LEVEL);
|
|
||||||
|
|
||||||
while (*level > 0) {
|
while (*level > 0) {
|
||||||
struct btrfs_key first_key;
|
struct btrfs_key first_key;
|
||||||
|
|
||||||
WARN_ON(*level < 0);
|
|
||||||
WARN_ON(*level >= BTRFS_MAX_LEVEL);
|
|
||||||
cur = path->nodes[*level];
|
cur = path->nodes[*level];
|
||||||
|
|
||||||
WARN_ON(btrfs_header_level(cur) != *level);
|
WARN_ON(btrfs_header_level(cur) != *level);
|
||||||
@ -2732,9 +2727,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
|||||||
|
|
||||||
WARN_ON(root_owner !=
|
WARN_ON(root_owner !=
|
||||||
BTRFS_TREE_LOG_OBJECTID);
|
BTRFS_TREE_LOG_OBJECTID);
|
||||||
ret = btrfs_free_and_pin_reserved_extent(
|
ret = btrfs_pin_reserved_extent(fs_info,
|
||||||
fs_info, bytenr,
|
bytenr, blocksize);
|
||||||
blocksize);
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
free_extent_buffer(next);
|
free_extent_buffer(next);
|
||||||
return ret;
|
return ret;
|
||||||
@ -2749,7 +2743,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(*level <= 0);
|
|
||||||
if (path->nodes[*level-1])
|
if (path->nodes[*level-1])
|
||||||
free_extent_buffer(path->nodes[*level-1]);
|
free_extent_buffer(path->nodes[*level-1]);
|
||||||
path->nodes[*level-1] = next;
|
path->nodes[*level-1] = next;
|
||||||
@ -2757,9 +2750,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
|||||||
path->slots[*level] = 0;
|
path->slots[*level] = 0;
|
||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
WARN_ON(*level < 0);
|
|
||||||
WARN_ON(*level >= BTRFS_MAX_LEVEL);
|
|
||||||
|
|
||||||
path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
|
path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
@ -2815,8 +2805,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
|
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
|
||||||
ret = btrfs_free_and_pin_reserved_extent(
|
ret = btrfs_pin_reserved_extent(fs_info,
|
||||||
fs_info,
|
|
||||||
path->nodes[*level]->start,
|
path->nodes[*level]->start,
|
||||||
path->nodes[*level]->len);
|
path->nodes[*level]->len);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -2896,10 +2885,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
|
|||||||
clear_extent_buffer_dirty(next);
|
clear_extent_buffer_dirty(next);
|
||||||
}
|
}
|
||||||
|
|
||||||
WARN_ON(log->root_key.objectid !=
|
ret = btrfs_pin_reserved_extent(fs_info, next->start,
|
||||||
BTRFS_TREE_LOG_OBJECTID);
|
next->len);
|
||||||
ret = btrfs_free_and_pin_reserved_extent(fs_info,
|
|
||||||
next->start, next->len);
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -3935,7 +3922,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
|
|||||||
static noinline int copy_items(struct btrfs_trans_handle *trans,
|
static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_inode *inode,
|
struct btrfs_inode *inode,
|
||||||
struct btrfs_path *dst_path,
|
struct btrfs_path *dst_path,
|
||||||
struct btrfs_path *src_path, u64 *last_extent,
|
struct btrfs_path *src_path,
|
||||||
int start_slot, int nr, int inode_only,
|
int start_slot, int nr, int inode_only,
|
||||||
u64 logged_isize)
|
u64 logged_isize)
|
||||||
{
|
{
|
||||||
@ -3946,7 +3933,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||||||
struct btrfs_file_extent_item *extent;
|
struct btrfs_file_extent_item *extent;
|
||||||
struct btrfs_inode_item *inode_item;
|
struct btrfs_inode_item *inode_item;
|
||||||
struct extent_buffer *src = src_path->nodes[0];
|
struct extent_buffer *src = src_path->nodes[0];
|
||||||
struct btrfs_key first_key, last_key, key;
|
|
||||||
int ret;
|
int ret;
|
||||||
struct btrfs_key *ins_keys;
|
struct btrfs_key *ins_keys;
|
||||||
u32 *ins_sizes;
|
u32 *ins_sizes;
|
||||||
@ -3954,9 +3940,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||||||
int i;
|
int i;
|
||||||
struct list_head ordered_sums;
|
struct list_head ordered_sums;
|
||||||
int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
|
int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
|
||||||
bool has_extents = false;
|
|
||||||
bool need_find_last_extent = true;
|
|
||||||
bool done = false;
|
|
||||||
|
|
||||||
INIT_LIST_HEAD(&ordered_sums);
|
INIT_LIST_HEAD(&ordered_sums);
|
||||||
|
|
||||||
@ -3965,8 +3948,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||||||
if (!ins_data)
|
if (!ins_data)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
first_key.objectid = (u64)-1;
|
|
||||||
|
|
||||||
ins_sizes = (u32 *)ins_data;
|
ins_sizes = (u32 *)ins_data;
|
||||||
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
|
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
|
||||||
|
|
||||||
@ -3987,9 +3968,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||||||
|
|
||||||
src_offset = btrfs_item_ptr_offset(src, start_slot + i);
|
src_offset = btrfs_item_ptr_offset(src, start_slot + i);
|
||||||
|
|
||||||
if (i == nr - 1)
|
|
||||||
last_key = ins_keys[i];
|
|
||||||
|
|
||||||
if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
|
if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
|
||||||
inode_item = btrfs_item_ptr(dst_path->nodes[0],
|
inode_item = btrfs_item_ptr(dst_path->nodes[0],
|
||||||
dst_path->slots[0],
|
dst_path->slots[0],
|
||||||
@ -4003,20 +3981,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||||||
src_offset, ins_sizes[i]);
|
src_offset, ins_sizes[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We set need_find_last_extent here in case we know we were
|
|
||||||
* processing other items and then walk into the first extent in
|
|
||||||
* the inode. If we don't hit an extent then nothing changes,
|
|
||||||
* we'll do the last search the next time around.
|
|
||||||
*/
|
|
||||||
if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
|
|
||||||
has_extents = true;
|
|
||||||
if (first_key.objectid == (u64)-1)
|
|
||||||
first_key = ins_keys[i];
|
|
||||||
} else {
|
|
||||||
need_find_last_extent = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* take a reference on file data extents so that truncates
|
/* take a reference on file data extents so that truncates
|
||||||
* or deletes of this inode don't have to relog the inode
|
* or deletes of this inode don't have to relog the inode
|
||||||
* again
|
* again
|
||||||
@ -4082,167 +4046,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
|||||||
kfree(sums);
|
kfree(sums);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!has_extents)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
if (need_find_last_extent && *last_extent == first_key.offset) {
|
|
||||||
/*
|
|
||||||
* We don't have any leafs between our current one and the one
|
|
||||||
* we processed before that can have file extent items for our
|
|
||||||
* inode (and have a generation number smaller than our current
|
|
||||||
* transaction id).
|
|
||||||
*/
|
|
||||||
need_find_last_extent = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Because we use btrfs_search_forward we could skip leaves that were
|
|
||||||
* not modified and then assume *last_extent is valid when it really
|
|
||||||
* isn't. So back up to the previous leaf and read the end of the last
|
|
||||||
* extent before we go and fill in holes.
|
|
||||||
*/
|
|
||||||
if (need_find_last_extent) {
|
|
||||||
u64 len;
|
|
||||||
|
|
||||||
ret = btrfs_prev_leaf(inode->root, src_path);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
if (ret)
|
|
||||||
goto fill_holes;
|
|
||||||
if (src_path->slots[0])
|
|
||||||
src_path->slots[0]--;
|
|
||||||
src = src_path->nodes[0];
|
|
||||||
btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
|
|
||||||
if (key.objectid != btrfs_ino(inode) ||
|
|
||||||
key.type != BTRFS_EXTENT_DATA_KEY)
|
|
||||||
goto fill_holes;
|
|
||||||
extent = btrfs_item_ptr(src, src_path->slots[0],
|
|
||||||
struct btrfs_file_extent_item);
|
|
||||||
if (btrfs_file_extent_type(src, extent) ==
|
|
||||||
BTRFS_FILE_EXTENT_INLINE) {
|
|
||||||
len = btrfs_file_extent_ram_bytes(src, extent);
|
|
||||||
*last_extent = ALIGN(key.offset + len,
|
|
||||||
fs_info->sectorsize);
|
|
||||||
} else {
|
|
||||||
len = btrfs_file_extent_num_bytes(src, extent);
|
|
||||||
*last_extent = key.offset + len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fill_holes:
|
|
||||||
/* So we did prev_leaf, now we need to move to the next leaf, but a few
|
|
||||||
* things could have happened
|
|
||||||
*
|
|
||||||
* 1) A merge could have happened, so we could currently be on a leaf
|
|
||||||
* that holds what we were copying in the first place.
|
|
||||||
* 2) A split could have happened, and now not all of the items we want
|
|
||||||
* are on the same leaf.
|
|
||||||
*
|
|
||||||
* So we need to adjust how we search for holes, we need to drop the
|
|
||||||
* path and re-search for the first extent key we found, and then walk
|
|
||||||
* forward until we hit the last one we copied.
|
|
||||||
*/
|
|
||||||
if (need_find_last_extent) {
|
|
||||||
/* btrfs_prev_leaf could return 1 without releasing the path */
|
|
||||||
btrfs_release_path(src_path);
|
|
||||||
ret = btrfs_search_slot(NULL, inode->root, &first_key,
|
|
||||||
src_path, 0, 0);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
ASSERT(ret == 0);
|
|
||||||
src = src_path->nodes[0];
|
|
||||||
i = src_path->slots[0];
|
|
||||||
} else {
|
|
||||||
i = start_slot;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Ok so here we need to go through and fill in any holes we may have
|
|
||||||
* to make sure that holes are punched for those areas in case they had
|
|
||||||
* extents previously.
|
|
||||||
*/
|
|
||||||
while (!done) {
|
|
||||||
u64 offset, len;
|
|
||||||
u64 extent_end;
|
|
||||||
|
|
||||||
if (i >= btrfs_header_nritems(src_path->nodes[0])) {
|
|
||||||
ret = btrfs_next_leaf(inode->root, src_path);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
ASSERT(ret == 0);
|
|
||||||
src = src_path->nodes[0];
|
|
||||||
i = 0;
|
|
||||||
need_find_last_extent = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
btrfs_item_key_to_cpu(src, &key, i);
|
|
||||||
if (!btrfs_comp_cpu_keys(&key, &last_key))
|
|
||||||
done = true;
|
|
||||||
if (key.objectid != btrfs_ino(inode) ||
|
|
||||||
key.type != BTRFS_EXTENT_DATA_KEY) {
|
|
||||||
i++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
|
|
||||||
if (btrfs_file_extent_type(src, extent) ==
|
|
||||||
BTRFS_FILE_EXTENT_INLINE) {
|
|
||||||
len = btrfs_file_extent_ram_bytes(src, extent);
|
|
||||||
extent_end = ALIGN(key.offset + len,
|
|
||||||
fs_info->sectorsize);
|
|
||||||
} else {
|
|
||||||
len = btrfs_file_extent_num_bytes(src, extent);
|
|
||||||
extent_end = key.offset + len;
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
|
|
||||||
if (*last_extent == key.offset) {
|
|
||||||
*last_extent = extent_end;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
offset = *last_extent;
|
|
||||||
len = key.offset - *last_extent;
|
|
||||||
ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
|
|
||||||
offset, 0, 0, len, 0, len, 0, 0, 0);
|
|
||||||
if (ret)
|
|
||||||
break;
|
|
||||||
*last_extent = extent_end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if there is a hole between the last extent found in our leaf
|
|
||||||
* and the first extent in the next leaf. If there is one, we need to
|
|
||||||
* log an explicit hole so that at replay time we can punch the hole.
|
|
||||||
*/
|
|
||||||
if (ret == 0 &&
|
|
||||||
key.objectid == btrfs_ino(inode) &&
|
|
||||||
key.type == BTRFS_EXTENT_DATA_KEY &&
|
|
||||||
i == btrfs_header_nritems(src_path->nodes[0])) {
|
|
||||||
ret = btrfs_next_leaf(inode->root, src_path);
|
|
||||||
need_find_last_extent = true;
|
|
||||||
if (ret > 0) {
|
|
||||||
ret = 0;
|
|
||||||
} else if (ret == 0) {
|
|
||||||
btrfs_item_key_to_cpu(src_path->nodes[0], &key,
|
|
||||||
src_path->slots[0]);
|
|
||||||
if (key.objectid == btrfs_ino(inode) &&
|
|
||||||
key.type == BTRFS_EXTENT_DATA_KEY &&
|
|
||||||
*last_extent < key.offset) {
|
|
||||||
const u64 len = key.offset - *last_extent;
|
|
||||||
|
|
||||||
ret = btrfs_insert_file_extent(trans, log,
|
|
||||||
btrfs_ino(inode),
|
|
||||||
*last_extent, 0,
|
|
||||||
0, len, 0, len,
|
|
||||||
0, 0, 0);
|
|
||||||
*last_extent += len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Need to let the callers know we dropped the path so they should
|
|
||||||
* re-search.
|
|
||||||
*/
|
|
||||||
if (!ret && need_find_last_extent)
|
|
||||||
ret = 1;
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4407,7 +4210,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
|||||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||||
const u64 ino = btrfs_ino(inode);
|
const u64 ino = btrfs_ino(inode);
|
||||||
struct btrfs_path *dst_path = NULL;
|
struct btrfs_path *dst_path = NULL;
|
||||||
u64 last_extent = (u64)-1;
|
bool dropped_extents = false;
|
||||||
int ins_nr = 0;
|
int ins_nr = 0;
|
||||||
int start_slot;
|
int start_slot;
|
||||||
int ret;
|
int ret;
|
||||||
@ -4429,8 +4232,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
|||||||
if (slot >= btrfs_header_nritems(leaf)) {
|
if (slot >= btrfs_header_nritems(leaf)) {
|
||||||
if (ins_nr > 0) {
|
if (ins_nr > 0) {
|
||||||
ret = copy_items(trans, inode, dst_path, path,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
&last_extent, start_slot,
|
start_slot, ins_nr, 1, 0);
|
||||||
ins_nr, 1, 0);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
ins_nr = 0;
|
ins_nr = 0;
|
||||||
@ -4454,8 +4256,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
|||||||
path->slots[0]++;
|
path->slots[0]++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (last_extent == (u64)-1) {
|
if (!dropped_extents) {
|
||||||
last_extent = key.offset;
|
|
||||||
/*
|
/*
|
||||||
* Avoid logging extent items logged in past fsync calls
|
* Avoid logging extent items logged in past fsync calls
|
||||||
* and leading to duplicate keys in the log tree.
|
* and leading to duplicate keys in the log tree.
|
||||||
@ -4469,6 +4270,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
|||||||
} while (ret == -EAGAIN);
|
} while (ret == -EAGAIN);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
|
dropped_extents = true;
|
||||||
}
|
}
|
||||||
if (ins_nr == 0)
|
if (ins_nr == 0)
|
||||||
start_slot = slot;
|
start_slot = slot;
|
||||||
@ -4483,7 +4285,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ins_nr > 0) {
|
if (ins_nr > 0) {
|
||||||
ret = copy_items(trans, inode, dst_path, path, &last_extent,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
start_slot, ins_nr, 1, 0);
|
start_slot, ins_nr, 1, 0);
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
@ -4670,13 +4472,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
|||||||
|
|
||||||
if (slot >= nritems) {
|
if (slot >= nritems) {
|
||||||
if (ins_nr > 0) {
|
if (ins_nr > 0) {
|
||||||
u64 last_extent = 0;
|
|
||||||
|
|
||||||
ret = copy_items(trans, inode, dst_path, path,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
&last_extent, start_slot,
|
start_slot, ins_nr, 1, 0);
|
||||||
ins_nr, 1, 0);
|
|
||||||
/* can't be 1, extent items aren't processed */
|
|
||||||
ASSERT(ret <= 0);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
ins_nr = 0;
|
ins_nr = 0;
|
||||||
@ -4700,13 +4497,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
}
|
}
|
||||||
if (ins_nr > 0) {
|
if (ins_nr > 0) {
|
||||||
u64 last_extent = 0;
|
|
||||||
|
|
||||||
ret = copy_items(trans, inode, dst_path, path,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
&last_extent, start_slot,
|
start_slot, ins_nr, 1, 0);
|
||||||
ins_nr, 1, 0);
|
|
||||||
/* can't be 1, extent items aren't processed */
|
|
||||||
ASSERT(ret <= 0);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -4715,100 +4507,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the no holes feature is enabled we need to make sure any hole between the
|
* When using the NO_HOLES feature if we punched a hole that causes the
|
||||||
* last extent and the i_size of our inode is explicitly marked in the log. This
|
* deletion of entire leafs or all the extent items of the first leaf (the one
|
||||||
* is to make sure that doing something like:
|
* that contains the inode item and references) we may end up not processing
|
||||||
*
|
* any extents, because there are no leafs with a generation matching the
|
||||||
* 1) create file with 128Kb of data
|
* current transaction that have extent items for our inode. So we need to find
|
||||||
* 2) truncate file to 64Kb
|
* if any holes exist and then log them. We also need to log holes after any
|
||||||
* 3) truncate file to 256Kb
|
* truncate operation that changes the inode's size.
|
||||||
* 4) fsync file
|
|
||||||
* 5) <crash/power failure>
|
|
||||||
* 6) mount fs and trigger log replay
|
|
||||||
*
|
|
||||||
* Will give us a file with a size of 256Kb, the first 64Kb of data match what
|
|
||||||
* the file had in its first 64Kb of data at step 1 and the last 192Kb of the
|
|
||||||
* file correspond to a hole. The presence of explicit holes in a log tree is
|
|
||||||
* what guarantees that log replay will remove/adjust file extent items in the
|
|
||||||
* fs/subvol tree.
|
|
||||||
*
|
|
||||||
* Here we do not need to care about holes between extents, that is already done
|
|
||||||
* by copy_items(). We also only need to do this in the full sync path, where we
|
|
||||||
* lookup for extents from the fs/subvol tree only. In the fast path case, we
|
|
||||||
* lookup the list of modified extent maps and if any represents a hole, we
|
|
||||||
* insert a corresponding extent representing a hole in the log tree.
|
|
||||||
*/
|
*/
|
||||||
static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
|
static int btrfs_log_holes(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
struct btrfs_inode *inode,
|
struct btrfs_inode *inode,
|
||||||
struct btrfs_path *path)
|
struct btrfs_path *path)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||||
int ret;
|
|
||||||
struct btrfs_key key;
|
struct btrfs_key key;
|
||||||
u64 hole_start;
|
|
||||||
u64 hole_size;
|
|
||||||
struct extent_buffer *leaf;
|
|
||||||
struct btrfs_root *log = root->log_root;
|
|
||||||
const u64 ino = btrfs_ino(inode);
|
const u64 ino = btrfs_ino(inode);
|
||||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||||
|
u64 prev_extent_end = 0;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (!btrfs_fs_incompat(fs_info, NO_HOLES))
|
if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
key.objectid = ino;
|
key.objectid = ino;
|
||||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||||
key.offset = (u64)-1;
|
key.offset = 0;
|
||||||
|
|
||||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||||
ASSERT(ret != 0);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ASSERT(path->slots[0] > 0);
|
while (true) {
|
||||||
path->slots[0]--;
|
|
||||||
leaf = path->nodes[0];
|
|
||||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
|
||||||
|
|
||||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
|
|
||||||
/* inode does not have any extents */
|
|
||||||
hole_start = 0;
|
|
||||||
hole_size = i_size;
|
|
||||||
} else {
|
|
||||||
struct btrfs_file_extent_item *extent;
|
struct btrfs_file_extent_item *extent;
|
||||||
|
struct extent_buffer *leaf = path->nodes[0];
|
||||||
u64 len;
|
u64 len;
|
||||||
|
|
||||||
/*
|
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||||
* If there's an extent beyond i_size, an explicit hole was
|
ret = btrfs_next_leaf(root, path);
|
||||||
* already inserted by copy_items().
|
if (ret < 0)
|
||||||
*/
|
return ret;
|
||||||
if (key.offset >= i_size)
|
if (ret > 0) {
|
||||||
return 0;
|
ret = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
leaf = path->nodes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||||
|
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* We have a hole, log it. */
|
||||||
|
if (prev_extent_end < key.offset) {
|
||||||
|
const u64 hole_len = key.offset - prev_extent_end;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Release the path to avoid deadlocks with other code
|
||||||
|
* paths that search the root while holding locks on
|
||||||
|
* leafs from the log root.
|
||||||
|
*/
|
||||||
|
btrfs_release_path(path);
|
||||||
|
ret = btrfs_insert_file_extent(trans, root->log_root,
|
||||||
|
ino, prev_extent_end, 0,
|
||||||
|
0, hole_len, 0, hole_len,
|
||||||
|
0, 0, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Search for the same key again in the root. Since it's
|
||||||
|
* an extent item and we are holding the inode lock, the
|
||||||
|
* key must still exist. If it doesn't just emit warning
|
||||||
|
* and return an error to fall back to a transaction
|
||||||
|
* commit.
|
||||||
|
*/
|
||||||
|
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
if (WARN_ON(ret > 0))
|
||||||
|
return -ENOENT;
|
||||||
|
leaf = path->nodes[0];
|
||||||
|
}
|
||||||
|
|
||||||
extent = btrfs_item_ptr(leaf, path->slots[0],
|
extent = btrfs_item_ptr(leaf, path->slots[0],
|
||||||
struct btrfs_file_extent_item);
|
struct btrfs_file_extent_item);
|
||||||
|
|
||||||
if (btrfs_file_extent_type(leaf, extent) ==
|
if (btrfs_file_extent_type(leaf, extent) ==
|
||||||
BTRFS_FILE_EXTENT_INLINE)
|
BTRFS_FILE_EXTENT_INLINE) {
|
||||||
return 0;
|
len = btrfs_file_extent_ram_bytes(leaf, extent);
|
||||||
|
prev_extent_end = ALIGN(key.offset + len,
|
||||||
|
fs_info->sectorsize);
|
||||||
|
} else {
|
||||||
|
len = btrfs_file_extent_num_bytes(leaf, extent);
|
||||||
|
prev_extent_end = key.offset + len;
|
||||||
|
}
|
||||||
|
|
||||||
len = btrfs_file_extent_num_bytes(leaf, extent);
|
path->slots[0]++;
|
||||||
/* Last extent goes beyond i_size, no need to log a hole. */
|
cond_resched();
|
||||||
if (key.offset + len > i_size)
|
|
||||||
return 0;
|
|
||||||
hole_start = key.offset + len;
|
|
||||||
hole_size = i_size - hole_start;
|
|
||||||
}
|
}
|
||||||
btrfs_release_path(path);
|
|
||||||
|
|
||||||
/* Last extent ends at i_size. */
|
if (prev_extent_end < i_size) {
|
||||||
if (hole_size == 0)
|
u64 hole_len;
|
||||||
return 0;
|
|
||||||
|
|
||||||
hole_size = ALIGN(hole_size, fs_info->sectorsize);
|
btrfs_release_path(path);
|
||||||
ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
|
hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
|
||||||
hole_size, 0, hole_size, 0, 0, 0);
|
ret = btrfs_insert_file_extent(trans, root->log_root,
|
||||||
return ret;
|
ino, prev_extent_end, 0, 0,
|
||||||
|
hole_len, 0, hole_len,
|
||||||
|
0, 0, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -5011,6 +4822,50 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
|
|||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* If the inode was already logged skip it - otherwise we can
|
||||||
|
* hit an infinite loop. Example:
|
||||||
|
*
|
||||||
|
* From the commit root (previous transaction) we have the
|
||||||
|
* following inodes:
|
||||||
|
*
|
||||||
|
* inode 257 a directory
|
||||||
|
* inode 258 with references "zz" and "zz_link" on inode 257
|
||||||
|
* inode 259 with reference "a" on inode 257
|
||||||
|
*
|
||||||
|
* And in the current (uncommitted) transaction we have:
|
||||||
|
*
|
||||||
|
* inode 257 a directory, unchanged
|
||||||
|
* inode 258 with references "a" and "a2" on inode 257
|
||||||
|
* inode 259 with reference "zz_link" on inode 257
|
||||||
|
* inode 261 with reference "zz" on inode 257
|
||||||
|
*
|
||||||
|
* When logging inode 261 the following infinite loop could
|
||||||
|
* happen if we don't skip already logged inodes:
|
||||||
|
*
|
||||||
|
* - we detect inode 258 as a conflicting inode, with inode 261
|
||||||
|
* on reference "zz", and log it;
|
||||||
|
*
|
||||||
|
* - we detect inode 259 as a conflicting inode, with inode 258
|
||||||
|
* on reference "a", and log it;
|
||||||
|
*
|
||||||
|
* - we detect inode 258 as a conflicting inode, with inode 259
|
||||||
|
* on reference "zz_link", and log it - again! After this we
|
||||||
|
* repeat the above steps forever.
|
||||||
|
*/
|
||||||
|
spin_lock(&BTRFS_I(inode)->lock);
|
||||||
|
/*
|
||||||
|
* Check the inode's logged_trans only instead of
|
||||||
|
* btrfs_inode_in_log(). This is because the last_log_commit of
|
||||||
|
* the inode is not updated when we only log that it exists and
|
||||||
|
* and it has the full sync bit set (see btrfs_log_inode()).
|
||||||
|
*/
|
||||||
|
if (BTRFS_I(inode)->logged_trans == trans->transid) {
|
||||||
|
spin_unlock(&BTRFS_I(inode)->lock);
|
||||||
|
btrfs_add_delayed_iput(inode);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
spin_unlock(&BTRFS_I(inode)->lock);
|
||||||
/*
|
/*
|
||||||
* We are safe logging the other inode without acquiring its
|
* We are safe logging the other inode without acquiring its
|
||||||
* lock as long as we log with the LOG_INODE_EXISTS mode. We
|
* lock as long as we log with the LOG_INODE_EXISTS mode. We
|
||||||
@ -5110,7 +4965,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
|||||||
struct btrfs_key min_key;
|
struct btrfs_key min_key;
|
||||||
struct btrfs_key max_key;
|
struct btrfs_key max_key;
|
||||||
struct btrfs_root *log = root->log_root;
|
struct btrfs_root *log = root->log_root;
|
||||||
u64 last_extent = 0;
|
|
||||||
int err = 0;
|
int err = 0;
|
||||||
int ret;
|
int ret;
|
||||||
int nritems;
|
int nritems;
|
||||||
@ -5288,7 +5142,7 @@ again:
|
|||||||
ins_start_slot = path->slots[0];
|
ins_start_slot = path->slots[0];
|
||||||
}
|
}
|
||||||
ret = copy_items(trans, inode, dst_path, path,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
&last_extent, ins_start_slot,
|
ins_start_slot,
|
||||||
ins_nr, inode_only,
|
ins_nr, inode_only,
|
||||||
logged_isize);
|
logged_isize);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
@ -5311,17 +5165,13 @@ again:
|
|||||||
if (ins_nr == 0)
|
if (ins_nr == 0)
|
||||||
goto next_slot;
|
goto next_slot;
|
||||||
ret = copy_items(trans, inode, dst_path, path,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
&last_extent, ins_start_slot,
|
ins_start_slot,
|
||||||
ins_nr, inode_only, logged_isize);
|
ins_nr, inode_only, logged_isize);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
err = ret;
|
err = ret;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
ins_nr = 0;
|
ins_nr = 0;
|
||||||
if (ret) {
|
|
||||||
btrfs_release_path(path);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
goto next_slot;
|
goto next_slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5334,18 +5184,13 @@ again:
|
|||||||
goto next_slot;
|
goto next_slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = copy_items(trans, inode, dst_path, path, &last_extent,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
ins_start_slot, ins_nr, inode_only,
|
ins_start_slot, ins_nr, inode_only,
|
||||||
logged_isize);
|
logged_isize);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
err = ret;
|
err = ret;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
if (ret) {
|
|
||||||
ins_nr = 0;
|
|
||||||
btrfs_release_path(path);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
ins_nr = 1;
|
ins_nr = 1;
|
||||||
ins_start_slot = path->slots[0];
|
ins_start_slot = path->slots[0];
|
||||||
next_slot:
|
next_slot:
|
||||||
@ -5359,13 +5204,12 @@ next_slot:
|
|||||||
}
|
}
|
||||||
if (ins_nr) {
|
if (ins_nr) {
|
||||||
ret = copy_items(trans, inode, dst_path, path,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
&last_extent, ins_start_slot,
|
ins_start_slot,
|
||||||
ins_nr, inode_only, logged_isize);
|
ins_nr, inode_only, logged_isize);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
err = ret;
|
err = ret;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
ret = 0;
|
|
||||||
ins_nr = 0;
|
ins_nr = 0;
|
||||||
}
|
}
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
@ -5380,14 +5224,13 @@ next_key:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ins_nr) {
|
if (ins_nr) {
|
||||||
ret = copy_items(trans, inode, dst_path, path, &last_extent,
|
ret = copy_items(trans, inode, dst_path, path,
|
||||||
ins_start_slot, ins_nr, inode_only,
|
ins_start_slot, ins_nr, inode_only,
|
||||||
logged_isize);
|
logged_isize);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
err = ret;
|
err = ret;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
ret = 0;
|
|
||||||
ins_nr = 0;
|
ins_nr = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5400,7 +5243,7 @@ next_key:
|
|||||||
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
|
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
btrfs_release_path(dst_path);
|
btrfs_release_path(dst_path);
|
||||||
err = btrfs_log_trailing_hole(trans, root, inode, path);
|
err = btrfs_log_holes(trans, root, inode, path);
|
||||||
if (err)
|
if (err)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
@ -30,6 +30,7 @@
|
|||||||
#include "tree-checker.h"
|
#include "tree-checker.h"
|
||||||
#include "space-info.h"
|
#include "space-info.h"
|
||||||
#include "block-group.h"
|
#include "block-group.h"
|
||||||
|
#include "discard.h"
|
||||||
|
|
||||||
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||||
[BTRFS_RAID_RAID10] = {
|
[BTRFS_RAID_RAID10] = {
|
||||||
@ -66,6 +67,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
|||||||
.tolerated_failures = 2,
|
.tolerated_failures = 2,
|
||||||
.devs_increment = 3,
|
.devs_increment = 3,
|
||||||
.ncopies = 3,
|
.ncopies = 3,
|
||||||
|
.nparity = 0,
|
||||||
.raid_name = "raid1c3",
|
.raid_name = "raid1c3",
|
||||||
.bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
|
.bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
|
||||||
.mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
|
.mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
|
||||||
@ -78,6 +80,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
|||||||
.tolerated_failures = 3,
|
.tolerated_failures = 3,
|
||||||
.devs_increment = 4,
|
.devs_increment = 4,
|
||||||
.ncopies = 4,
|
.ncopies = 4,
|
||||||
|
.nparity = 0,
|
||||||
.raid_name = "raid1c4",
|
.raid_name = "raid1c4",
|
||||||
.bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
|
.bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
|
||||||
.mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
|
.mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
|
||||||
@ -438,39 +441,6 @@ static noinline struct btrfs_fs_devices *find_fsid(
|
|||||||
|
|
||||||
ASSERT(fsid);
|
ASSERT(fsid);
|
||||||
|
|
||||||
if (metadata_fsid) {
|
|
||||||
/*
|
|
||||||
* Handle scanned device having completed its fsid change but
|
|
||||||
* belonging to a fs_devices that was created by first scanning
|
|
||||||
* a device which didn't have its fsid/metadata_uuid changed
|
|
||||||
* at all and the CHANGING_FSID_V2 flag set.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
|
||||||
if (fs_devices->fsid_change &&
|
|
||||||
memcmp(metadata_fsid, fs_devices->fsid,
|
|
||||||
BTRFS_FSID_SIZE) == 0 &&
|
|
||||||
memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
|
|
||||||
BTRFS_FSID_SIZE) == 0) {
|
|
||||||
return fs_devices;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Handle scanned device having completed its fsid change but
|
|
||||||
* belonging to a fs_devices that was created by a device that
|
|
||||||
* has an outdated pair of fsid/metadata_uuid and
|
|
||||||
* CHANGING_FSID_V2 flag set.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
|
||||||
if (fs_devices->fsid_change &&
|
|
||||||
memcmp(fs_devices->metadata_uuid,
|
|
||||||
fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
|
|
||||||
memcmp(metadata_fsid, fs_devices->metadata_uuid,
|
|
||||||
BTRFS_FSID_SIZE) == 0) {
|
|
||||||
return fs_devices;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle non-split brain cases */
|
/* Handle non-split brain cases */
|
||||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||||
if (metadata_fsid) {
|
if (metadata_fsid) {
|
||||||
@ -486,6 +456,47 @@ static noinline struct btrfs_fs_devices *find_fsid(
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
|
||||||
|
struct btrfs_super_block *disk_super)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct btrfs_fs_devices *fs_devices;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle scanned device having completed its fsid change but
|
||||||
|
* belonging to a fs_devices that was created by first scanning
|
||||||
|
* a device which didn't have its fsid/metadata_uuid changed
|
||||||
|
* at all and the CHANGING_FSID_V2 flag set.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||||
|
if (fs_devices->fsid_change &&
|
||||||
|
memcmp(disk_super->metadata_uuid, fs_devices->fsid,
|
||||||
|
BTRFS_FSID_SIZE) == 0 &&
|
||||||
|
memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
|
||||||
|
BTRFS_FSID_SIZE) == 0) {
|
||||||
|
return fs_devices;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Handle scanned device having completed its fsid change but
|
||||||
|
* belonging to a fs_devices that was created by a device that
|
||||||
|
* has an outdated pair of fsid/metadata_uuid and
|
||||||
|
* CHANGING_FSID_V2 flag set.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||||
|
if (fs_devices->fsid_change &&
|
||||||
|
memcmp(fs_devices->metadata_uuid,
|
||||||
|
fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
|
||||||
|
memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
|
||||||
|
BTRFS_FSID_SIZE) == 0) {
|
||||||
|
return fs_devices;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
|
btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
|
||||||
int flush, struct block_device **bdev,
|
int flush, struct block_device **bdev,
|
||||||
@ -669,7 +680,9 @@ error_brelse:
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
|
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
|
||||||
* being created with a disk that has already completed its fsid change.
|
* being created with a disk that has already completed its fsid change. Such
|
||||||
|
* disk can belong to an fs which has its FSID changed or to one which doesn't.
|
||||||
|
* Handle both cases here.
|
||||||
*/
|
*/
|
||||||
static struct btrfs_fs_devices *find_fsid_inprogress(
|
static struct btrfs_fs_devices *find_fsid_inprogress(
|
||||||
struct btrfs_super_block *disk_super)
|
struct btrfs_super_block *disk_super)
|
||||||
@ -685,7 +698,7 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return find_fsid(disk_super->fsid, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -697,17 +710,54 @@ static struct btrfs_fs_devices *find_fsid_changed(
|
|||||||
/*
|
/*
|
||||||
* Handles the case where scanned device is part of an fs that had
|
* Handles the case where scanned device is part of an fs that had
|
||||||
* multiple successful changes of FSID but curently device didn't
|
* multiple successful changes of FSID but curently device didn't
|
||||||
* observe it. Meaning our fsid will be different than theirs.
|
* observe it. Meaning our fsid will be different than theirs. We need
|
||||||
|
* to handle two subcases :
|
||||||
|
* 1 - The fs still continues to have different METADATA/FSID uuids.
|
||||||
|
* 2 - The fs is switched back to its original FSID (METADATA/FSID
|
||||||
|
* are equal).
|
||||||
*/
|
*/
|
||||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||||
|
/* Changed UUIDs */
|
||||||
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
|
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
|
||||||
BTRFS_FSID_SIZE) != 0 &&
|
BTRFS_FSID_SIZE) != 0 &&
|
||||||
memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
|
memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
|
||||||
BTRFS_FSID_SIZE) == 0 &&
|
BTRFS_FSID_SIZE) == 0 &&
|
||||||
memcmp(fs_devices->fsid, disk_super->fsid,
|
memcmp(fs_devices->fsid, disk_super->fsid,
|
||||||
BTRFS_FSID_SIZE) != 0) {
|
BTRFS_FSID_SIZE) != 0)
|
||||||
|
return fs_devices;
|
||||||
|
|
||||||
|
/* Unchanged UUIDs */
|
||||||
|
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
|
||||||
|
BTRFS_FSID_SIZE) == 0 &&
|
||||||
|
memcmp(fs_devices->fsid, disk_super->metadata_uuid,
|
||||||
|
BTRFS_FSID_SIZE) == 0)
|
||||||
|
return fs_devices;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct btrfs_fs_devices *find_fsid_reverted_metadata(
|
||||||
|
struct btrfs_super_block *disk_super)
|
||||||
|
{
|
||||||
|
struct btrfs_fs_devices *fs_devices;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle the case where the scanned device is part of an fs whose last
|
||||||
|
* metadata UUID change reverted it to the original FSID. At the same
|
||||||
|
* time * fs_devices was first created by another constitutent device
|
||||||
|
* which didn't fully observe the operation. This results in an
|
||||||
|
* btrfs_fs_devices created with metadata/fsid different AND
|
||||||
|
* btrfs_fs_devices::fsid_change set AND the metadata_uuid of the
|
||||||
|
* fs_devices equal to the FSID of the disk.
|
||||||
|
*/
|
||||||
|
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||||
|
if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
|
||||||
|
BTRFS_FSID_SIZE) != 0 &&
|
||||||
|
memcmp(fs_devices->metadata_uuid, disk_super->fsid,
|
||||||
|
BTRFS_FSID_SIZE) == 0 &&
|
||||||
|
fs_devices->fsid_change)
|
||||||
return fs_devices;
|
return fs_devices;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -734,24 +784,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
|||||||
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
|
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
|
||||||
|
|
||||||
if (fsid_change_in_progress) {
|
if (fsid_change_in_progress) {
|
||||||
if (!has_metadata_uuid) {
|
if (!has_metadata_uuid)
|
||||||
/*
|
|
||||||
* When we have an image which has CHANGING_FSID_V2 set
|
|
||||||
* it might belong to either a filesystem which has
|
|
||||||
* disks with completed fsid change or it might belong
|
|
||||||
* to fs with no UUID changes in effect, handle both.
|
|
||||||
*/
|
|
||||||
fs_devices = find_fsid_inprogress(disk_super);
|
fs_devices = find_fsid_inprogress(disk_super);
|
||||||
if (!fs_devices)
|
else
|
||||||
fs_devices = find_fsid(disk_super->fsid, NULL);
|
|
||||||
} else {
|
|
||||||
fs_devices = find_fsid_changed(disk_super);
|
fs_devices = find_fsid_changed(disk_super);
|
||||||
}
|
|
||||||
} else if (has_metadata_uuid) {
|
} else if (has_metadata_uuid) {
|
||||||
fs_devices = find_fsid(disk_super->fsid,
|
fs_devices = find_fsid_with_metadata_uuid(disk_super);
|
||||||
disk_super->metadata_uuid);
|
|
||||||
} else {
|
} else {
|
||||||
fs_devices = find_fsid(disk_super->fsid, NULL);
|
fs_devices = find_fsid_reverted_metadata(disk_super);
|
||||||
|
if (!fs_devices)
|
||||||
|
fs_devices = find_fsid(disk_super->fsid, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -781,12 +823,18 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
|||||||
* a device which had the CHANGING_FSID_V2 flag then replace the
|
* a device which had the CHANGING_FSID_V2 flag then replace the
|
||||||
* metadata_uuid/fsid values of the fs_devices.
|
* metadata_uuid/fsid values of the fs_devices.
|
||||||
*/
|
*/
|
||||||
if (has_metadata_uuid && fs_devices->fsid_change &&
|
if (fs_devices->fsid_change &&
|
||||||
found_transid > fs_devices->latest_generation) {
|
found_transid > fs_devices->latest_generation) {
|
||||||
memcpy(fs_devices->fsid, disk_super->fsid,
|
memcpy(fs_devices->fsid, disk_super->fsid,
|
||||||
BTRFS_FSID_SIZE);
|
BTRFS_FSID_SIZE);
|
||||||
memcpy(fs_devices->metadata_uuid,
|
|
||||||
disk_super->metadata_uuid, BTRFS_FSID_SIZE);
|
if (has_metadata_uuid)
|
||||||
|
memcpy(fs_devices->metadata_uuid,
|
||||||
|
disk_super->metadata_uuid,
|
||||||
|
BTRFS_FSID_SIZE);
|
||||||
|
else
|
||||||
|
memcpy(fs_devices->metadata_uuid,
|
||||||
|
disk_super->fsid, BTRFS_FSID_SIZE);
|
||||||
|
|
||||||
fs_devices->fsid_change = false;
|
fs_devices->fsid_change = false;
|
||||||
}
|
}
|
||||||
@ -1064,11 +1112,6 @@ static void btrfs_close_bdev(struct btrfs_device *device)
|
|||||||
static void btrfs_close_one_device(struct btrfs_device *device)
|
static void btrfs_close_one_device(struct btrfs_device *device)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_devices *fs_devices = device->fs_devices;
|
struct btrfs_fs_devices *fs_devices = device->fs_devices;
|
||||||
struct btrfs_device *new_device;
|
|
||||||
struct rcu_string *name;
|
|
||||||
|
|
||||||
if (device->bdev)
|
|
||||||
fs_devices->open_devices--;
|
|
||||||
|
|
||||||
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
||||||
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
||||||
@ -1080,23 +1123,22 @@ static void btrfs_close_one_device(struct btrfs_device *device)
|
|||||||
fs_devices->missing_devices--;
|
fs_devices->missing_devices--;
|
||||||
|
|
||||||
btrfs_close_bdev(device);
|
btrfs_close_bdev(device);
|
||||||
|
if (device->bdev) {
|
||||||
new_device = btrfs_alloc_device(NULL, &device->devid,
|
fs_devices->open_devices--;
|
||||||
device->uuid);
|
device->bdev = NULL;
|
||||||
BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
|
|
||||||
|
|
||||||
/* Safe because we are under uuid_mutex */
|
|
||||||
if (device->name) {
|
|
||||||
name = rcu_string_strdup(device->name->str, GFP_NOFS);
|
|
||||||
BUG_ON(!name); /* -ENOMEM */
|
|
||||||
rcu_assign_pointer(new_device->name, name);
|
|
||||||
}
|
}
|
||||||
|
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||||
|
|
||||||
list_replace_rcu(&device->dev_list, &new_device->dev_list);
|
device->fs_info = NULL;
|
||||||
new_device->fs_devices = device->fs_devices;
|
atomic_set(&device->dev_stats_ccnt, 0);
|
||||||
|
extent_io_tree_release(&device->alloc_state);
|
||||||
|
|
||||||
synchronize_rcu();
|
/* Verify the device is back in a pristine state */
|
||||||
btrfs_free_device(device);
|
ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
|
||||||
|
ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
|
||||||
|
ASSERT(list_empty(&device->dev_alloc_list));
|
||||||
|
ASSERT(list_empty(&device->post_commit_list));
|
||||||
|
ASSERT(atomic_read(&device->reada_in_flight) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
|
static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
|
||||||
@ -2130,7 +2172,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
|
|||||||
{
|
{
|
||||||
struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
|
struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
|
||||||
|
|
||||||
WARN_ON(!tgtdev);
|
|
||||||
mutex_lock(&fs_devices->device_list_mutex);
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
|
|
||||||
btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
|
btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
|
||||||
@ -2875,6 +2916,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
|||||||
{
|
{
|
||||||
struct btrfs_root *root = fs_info->chunk_root;
|
struct btrfs_root *root = fs_info->chunk_root;
|
||||||
struct btrfs_trans_handle *trans;
|
struct btrfs_trans_handle *trans;
|
||||||
|
struct btrfs_block_group *block_group;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2898,6 +2940,12 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||||
|
if (!block_group)
|
||||||
|
return -ENOENT;
|
||||||
|
btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
|
||||||
|
btrfs_put_block_group(block_group);
|
||||||
|
|
||||||
trans = btrfs_start_trans_remove_block_group(root->fs_info,
|
trans = btrfs_start_trans_remove_block_group(root->fs_info,
|
||||||
chunk_offset);
|
chunk_offset);
|
||||||
if (IS_ERR(trans)) {
|
if (IS_ERR(trans)) {
|
||||||
@ -6111,75 +6159,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
|||||||
return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
|
return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
|
||||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len)
|
|
||||||
{
|
|
||||||
struct extent_map *em;
|
|
||||||
struct map_lookup *map;
|
|
||||||
u64 *buf;
|
|
||||||
u64 bytenr;
|
|
||||||
u64 length;
|
|
||||||
u64 stripe_nr;
|
|
||||||
u64 rmap_len;
|
|
||||||
int i, j, nr = 0;
|
|
||||||
|
|
||||||
em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
|
|
||||||
if (IS_ERR(em))
|
|
||||||
return -EIO;
|
|
||||||
|
|
||||||
map = em->map_lookup;
|
|
||||||
length = em->len;
|
|
||||||
rmap_len = map->stripe_len;
|
|
||||||
|
|
||||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
|
|
||||||
length = div_u64(length, map->num_stripes / map->sub_stripes);
|
|
||||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
|
|
||||||
length = div_u64(length, map->num_stripes);
|
|
||||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
|
||||||
length = div_u64(length, nr_data_stripes(map));
|
|
||||||
rmap_len = map->stripe_len * nr_data_stripes(map);
|
|
||||||
}
|
|
||||||
|
|
||||||
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
|
|
||||||
BUG_ON(!buf); /* -ENOMEM */
|
|
||||||
|
|
||||||
for (i = 0; i < map->num_stripes; i++) {
|
|
||||||
if (map->stripes[i].physical > physical ||
|
|
||||||
map->stripes[i].physical + length <= physical)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
stripe_nr = physical - map->stripes[i].physical;
|
|
||||||
stripe_nr = div64_u64(stripe_nr, map->stripe_len);
|
|
||||||
|
|
||||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
|
|
||||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
|
||||||
stripe_nr = div_u64(stripe_nr, map->sub_stripes);
|
|
||||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
|
||||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
|
||||||
} /* else if RAID[56], multiply by nr_data_stripes().
|
|
||||||
* Alternatively, just use rmap_len below instead of
|
|
||||||
* map->stripe_len */
|
|
||||||
|
|
||||||
bytenr = chunk_start + stripe_nr * rmap_len;
|
|
||||||
WARN_ON(nr >= map->num_stripes);
|
|
||||||
for (j = 0; j < nr; j++) {
|
|
||||||
if (buf[j] == bytenr)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (j == nr) {
|
|
||||||
WARN_ON(nr >= map->num_stripes);
|
|
||||||
buf[nr++] = bytenr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*logical = buf;
|
|
||||||
*naddrs = nr;
|
|
||||||
*stripe_len = rmap_len;
|
|
||||||
|
|
||||||
free_extent_map(em);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
|
static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
|
||||||
{
|
{
|
||||||
bio->bi_private = bbio->private;
|
bio->bi_private = bbio->private;
|
||||||
@ -6480,19 +6459,14 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
|
|||||||
{
|
{
|
||||||
int index = btrfs_bg_flags_to_raid_index(type);
|
int index = btrfs_bg_flags_to_raid_index(type);
|
||||||
int ncopies = btrfs_raid_array[index].ncopies;
|
int ncopies = btrfs_raid_array[index].ncopies;
|
||||||
|
const int nparity = btrfs_raid_array[index].nparity;
|
||||||
int data_stripes;
|
int data_stripes;
|
||||||
|
|
||||||
switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
if (nparity)
|
||||||
case BTRFS_BLOCK_GROUP_RAID5:
|
data_stripes = num_stripes - nparity;
|
||||||
data_stripes = num_stripes - 1;
|
else
|
||||||
break;
|
|
||||||
case BTRFS_BLOCK_GROUP_RAID6:
|
|
||||||
data_stripes = num_stripes - 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
data_stripes = num_stripes / ncopies;
|
data_stripes = num_stripes / ncopies;
|
||||||
break;
|
|
||||||
}
|
|
||||||
return div_u64(chunk_len, data_stripes);
|
return div_u64(chunk_len, data_stripes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7331,6 +7305,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
|
|||||||
else
|
else
|
||||||
btrfs_dev_stat_set(dev, i, 0);
|
btrfs_dev_stat_set(dev, i, 0);
|
||||||
}
|
}
|
||||||
|
btrfs_info(fs_info, "device stats zeroed by %s (%d)",
|
||||||
|
current->comm, task_pid_nr(current));
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
|
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
|
||||||
if (stats->nr_items > i)
|
if (stats->nr_items > i)
|
||||||
|
@ -120,8 +120,6 @@ struct btrfs_device {
|
|||||||
/* per-device scrub information */
|
/* per-device scrub information */
|
||||||
struct scrub_ctx *scrub_ctx;
|
struct scrub_ctx *scrub_ctx;
|
||||||
|
|
||||||
struct btrfs_work work;
|
|
||||||
|
|
||||||
/* readahead state */
|
/* readahead state */
|
||||||
atomic_t reada_in_flight;
|
atomic_t reada_in_flight;
|
||||||
u64 reada_next;
|
u64 reada_next;
|
||||||
@ -138,6 +136,10 @@ struct btrfs_device {
|
|||||||
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
|
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
|
||||||
|
|
||||||
struct extent_io_tree alloc_state;
|
struct extent_io_tree alloc_state;
|
||||||
|
|
||||||
|
struct completion kobj_unregister;
|
||||||
|
/* For sysfs/FSID/devinfo/devid/ */
|
||||||
|
struct kobject devid_kobj;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -255,7 +257,7 @@ struct btrfs_fs_devices {
|
|||||||
struct btrfs_fs_info *fs_info;
|
struct btrfs_fs_info *fs_info;
|
||||||
/* sysfs kobjects */
|
/* sysfs kobjects */
|
||||||
struct kobject fsid_kobj;
|
struct kobject fsid_kobj;
|
||||||
struct kobject *device_dir_kobj;
|
struct kobject *devices_kobj;
|
||||||
struct completion kobj_unregister;
|
struct completion kobj_unregister;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -417,8 +419,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
|||||||
struct btrfs_bio **bbio_ret);
|
struct btrfs_bio **bbio_ret);
|
||||||
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||||
u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
|
u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
|
||||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
|
||||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
|
|
||||||
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
||||||
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
|
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
|
||||||
|
@ -456,6 +456,41 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen,
|
|||||||
return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
|
return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void bitmap_next_clear_region(unsigned long *bitmap,
|
||||||
|
unsigned int *rs, unsigned int *re,
|
||||||
|
unsigned int end)
|
||||||
|
{
|
||||||
|
*rs = find_next_zero_bit(bitmap, end, *rs);
|
||||||
|
*re = find_next_bit(bitmap, end, *rs + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void bitmap_next_set_region(unsigned long *bitmap,
|
||||||
|
unsigned int *rs, unsigned int *re,
|
||||||
|
unsigned int end)
|
||||||
|
{
|
||||||
|
*rs = find_next_bit(bitmap, end, *rs);
|
||||||
|
*re = find_next_zero_bit(bitmap, end, *rs + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bitmap region iterators. Iterates over the bitmap between [@start, @end).
|
||||||
|
* @rs and @re should be integer variables and will be set to start and end
|
||||||
|
* index of the current clear or set region.
|
||||||
|
*/
|
||||||
|
#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \
|
||||||
|
for ((rs) = (start), \
|
||||||
|
bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \
|
||||||
|
(rs) < (re); \
|
||||||
|
(rs) = (re) + 1, \
|
||||||
|
bitmap_next_clear_region((bitmap), &(rs), &(re), (end)))
|
||||||
|
|
||||||
|
#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \
|
||||||
|
for ((rs) = (start), \
|
||||||
|
bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \
|
||||||
|
(rs) < (re); \
|
||||||
|
(rs) = (re) + 1, \
|
||||||
|
bitmap_next_set_region((bitmap), &(rs), &(re), (end)))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
|
* BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
|
||||||
* @n: u64 value
|
* @n: u64 value
|
||||||
|
@ -496,9 +496,9 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
|
|||||||
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
|
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
|
||||||
__entry->ino = btrfs_ino(BTRFS_I(inode));
|
__entry->ino = btrfs_ino(BTRFS_I(inode));
|
||||||
__entry->file_offset = ordered->file_offset;
|
__entry->file_offset = ordered->file_offset;
|
||||||
__entry->start = ordered->start;
|
__entry->start = ordered->disk_bytenr;
|
||||||
__entry->len = ordered->len;
|
__entry->len = ordered->num_bytes;
|
||||||
__entry->disk_len = ordered->disk_len;
|
__entry->disk_len = ordered->disk_num_bytes;
|
||||||
__entry->bytes_left = ordered->bytes_left;
|
__entry->bytes_left = ordered->bytes_left;
|
||||||
__entry->flags = ordered->flags;
|
__entry->flags = ordered->flags;
|
||||||
__entry->compress_type = ordered->compress_type;
|
__entry->compress_type = ordered->compress_type;
|
||||||
|
61
mm/percpu.c
61
mm/percpu.c
@ -270,33 +270,6 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
|
|||||||
pcpu_unit_page_offset(cpu, page_idx);
|
pcpu_unit_page_offset(cpu, page_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void pcpu_next_unpop(unsigned long *bitmap, int *rs, int *re, int end)
|
|
||||||
{
|
|
||||||
*rs = find_next_zero_bit(bitmap, end, *rs);
|
|
||||||
*re = find_next_bit(bitmap, end, *rs + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end)
|
|
||||||
{
|
|
||||||
*rs = find_next_bit(bitmap, end, *rs);
|
|
||||||
*re = find_next_zero_bit(bitmap, end, *rs + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Bitmap region iterators. Iterates over the bitmap between
|
|
||||||
* [@start, @end) in @chunk. @rs and @re should be integer variables
|
|
||||||
* and will be set to start and end index of the current free region.
|
|
||||||
*/
|
|
||||||
#define pcpu_for_each_unpop_region(bitmap, rs, re, start, end) \
|
|
||||||
for ((rs) = (start), pcpu_next_unpop((bitmap), &(rs), &(re), (end)); \
|
|
||||||
(rs) < (re); \
|
|
||||||
(rs) = (re) + 1, pcpu_next_unpop((bitmap), &(rs), &(re), (end)))
|
|
||||||
|
|
||||||
#define pcpu_for_each_pop_region(bitmap, rs, re, start, end) \
|
|
||||||
for ((rs) = (start), pcpu_next_pop((bitmap), &(rs), &(re), (end)); \
|
|
||||||
(rs) < (re); \
|
|
||||||
(rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end)))
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following are helper functions to help access bitmaps and convert
|
* The following are helper functions to help access bitmaps and convert
|
||||||
* between bitmap offsets to address offsets.
|
* between bitmap offsets to address offsets.
|
||||||
@ -732,9 +705,8 @@ static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan)
|
|||||||
}
|
}
|
||||||
|
|
||||||
bits = 0;
|
bits = 0;
|
||||||
pcpu_for_each_md_free_region(chunk, bit_off, bits) {
|
pcpu_for_each_md_free_region(chunk, bit_off, bits)
|
||||||
pcpu_block_update(chunk_md, bit_off, bit_off + bits);
|
pcpu_block_update(chunk_md, bit_off, bit_off + bits);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -749,7 +721,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
|
|||||||
{
|
{
|
||||||
struct pcpu_block_md *block = chunk->md_blocks + index;
|
struct pcpu_block_md *block = chunk->md_blocks + index;
|
||||||
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
|
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
|
||||||
int rs, re, start; /* region start, region end */
|
unsigned int rs, re, start; /* region start, region end */
|
||||||
|
|
||||||
/* promote scan_hint to contig_hint */
|
/* promote scan_hint to contig_hint */
|
||||||
if (block->scan_hint) {
|
if (block->scan_hint) {
|
||||||
@ -765,10 +737,9 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
|
|||||||
block->right_free = 0;
|
block->right_free = 0;
|
||||||
|
|
||||||
/* iterate over free areas and update the contig hints */
|
/* iterate over free areas and update the contig hints */
|
||||||
pcpu_for_each_unpop_region(alloc_map, rs, re, start,
|
bitmap_for_each_clear_region(alloc_map, rs, re, start,
|
||||||
PCPU_BITMAP_BLOCK_BITS) {
|
PCPU_BITMAP_BLOCK_BITS)
|
||||||
pcpu_block_update(block, rs, re);
|
pcpu_block_update(block, rs, re);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1041,13 +1012,13 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
|
|||||||
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
|
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
|
||||||
int *next_off)
|
int *next_off)
|
||||||
{
|
{
|
||||||
int page_start, page_end, rs, re;
|
unsigned int page_start, page_end, rs, re;
|
||||||
|
|
||||||
page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
|
page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
|
||||||
page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
|
page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
|
||||||
|
|
||||||
rs = page_start;
|
rs = page_start;
|
||||||
pcpu_next_unpop(chunk->populated, &rs, &re, page_end);
|
bitmap_next_clear_region(chunk->populated, &rs, &re, page_end);
|
||||||
if (rs >= page_end)
|
if (rs >= page_end)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
@ -1702,13 +1673,13 @@ area_found:
|
|||||||
|
|
||||||
/* populate if not all pages are already there */
|
/* populate if not all pages are already there */
|
||||||
if (!is_atomic) {
|
if (!is_atomic) {
|
||||||
int page_start, page_end, rs, re;
|
unsigned int page_start, page_end, rs, re;
|
||||||
|
|
||||||
page_start = PFN_DOWN(off);
|
page_start = PFN_DOWN(off);
|
||||||
page_end = PFN_UP(off + size);
|
page_end = PFN_UP(off + size);
|
||||||
|
|
||||||
pcpu_for_each_unpop_region(chunk->populated, rs, re,
|
bitmap_for_each_clear_region(chunk->populated, rs, re,
|
||||||
page_start, page_end) {
|
page_start, page_end) {
|
||||||
WARN_ON(chunk->immutable);
|
WARN_ON(chunk->immutable);
|
||||||
|
|
||||||
ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
|
ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
|
||||||
@ -1858,10 +1829,10 @@ static void pcpu_balance_workfn(struct work_struct *work)
|
|||||||
spin_unlock_irq(&pcpu_lock);
|
spin_unlock_irq(&pcpu_lock);
|
||||||
|
|
||||||
list_for_each_entry_safe(chunk, next, &to_free, list) {
|
list_for_each_entry_safe(chunk, next, &to_free, list) {
|
||||||
int rs, re;
|
unsigned int rs, re;
|
||||||
|
|
||||||
pcpu_for_each_pop_region(chunk->populated, rs, re, 0,
|
bitmap_for_each_set_region(chunk->populated, rs, re, 0,
|
||||||
chunk->nr_pages) {
|
chunk->nr_pages) {
|
||||||
pcpu_depopulate_chunk(chunk, rs, re);
|
pcpu_depopulate_chunk(chunk, rs, re);
|
||||||
spin_lock_irq(&pcpu_lock);
|
spin_lock_irq(&pcpu_lock);
|
||||||
pcpu_chunk_depopulated(chunk, rs, re);
|
pcpu_chunk_depopulated(chunk, rs, re);
|
||||||
@ -1893,7 +1864,7 @@ retry_pop:
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
|
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
|
||||||
int nr_unpop = 0, rs, re;
|
unsigned int nr_unpop = 0, rs, re;
|
||||||
|
|
||||||
if (!nr_to_pop)
|
if (!nr_to_pop)
|
||||||
break;
|
break;
|
||||||
@ -1910,9 +1881,9 @@ retry_pop:
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* @chunk can't go away while pcpu_alloc_mutex is held */
|
/* @chunk can't go away while pcpu_alloc_mutex is held */
|
||||||
pcpu_for_each_unpop_region(chunk->populated, rs, re, 0,
|
bitmap_for_each_clear_region(chunk->populated, rs, re, 0,
|
||||||
chunk->nr_pages) {
|
chunk->nr_pages) {
|
||||||
int nr = min(re - rs, nr_to_pop);
|
int nr = min_t(int, re - rs, nr_to_pop);
|
||||||
|
|
||||||
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
|
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
|
||||||
if (!ret) {
|
if (!ret) {
|
||||||
|
Loading…
Reference in New Issue
Block a user