forked from Minki/linux
for-5.3-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl0sNWYACgkQxWXV+ddt WDsyQA/8CGnF68g6hwVuYz4K7f39gOiFlBnRxeN/3RT6vkNSyLZxvRDaDrSTzVIo cz2G/9qZLXsIll+3EfZlyzZZiA+4f4hEDAfAd4yVPavRom+uu7dbqzAIpgvFlYdH vhAYKOeWSqWElWJ06hzWO3FCwjY9GKFMk4PS0XHHp+STCT0hq1MkaHr44kiHsqdh T5nVGDwXz8nGDZ51RO6+mgiSrd5eHbs6kXCd8rW7hmjTx8ClKHa1tdkxN/us+pJm hTFT669m5ckHhY2AUKmkREoOwpnt2HcXQJNkz6gO+o03IDvYz73SScbhSYdNTlwi j74GLf89FA52qVM+JDg9MaWYqgf1pQI8AHK/rXw2FNbuP/eL9kuZ85ZIbO6CiO0c 5jAixReSwzSP/V0+MKW3F7k4KtIqbHAV6mkI8zLwrAee4Xj81BOtgL7gYPFQTwSZ ma0hEoen7IV5+/z9upUuLA5wr4BT+h1T+EllCWe1+9+9mRYOvowtkRNBL8HZWTDI b65oTITfot54xX9ecKtiuG2qoqJEjjkR+YKdRM4nph6wflSNZxEoezBp3iRFpYOL Lx+g97RcJ2EEoBVjVMkTqfj93GeiKRifa8yXdRY+A0I2ZXZEcS8DjSJM6rj3AOPy 4idIl+ABscayZowfqu0FSIULf1La0qiRXmbGNeG4ylhN4L6S/og= =eshk -----END PGP SIGNATURE----- Merge tag 'for-5.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "Highlights: - chunks that have been trimmed and unchanged since last mount are tracked and skipped on repeated trims - use hw assissed crc32c on more arches, speedups if native instructions or optimized implementation is available - the RAID56 incompat bit is automatically removed when the last block group of that type is removed Fixes: - fsync fix for reflink on NODATACOW files that could lead to ENOSPC - fix data loss after inode eviction, renaming it, and fsync it - fix fsync not persisting dentry deletions due to inode evictions - update ctime/mtime/iversion after hole punching - fix compression type validation (reported by KASAN) - send won't be allowed to start when relocation is in progress, this can cause spurious errors or produce incorrect send stream Core: - new tracepoints for space update - tree-checker: better check for end of extents for some tree items - preparatory work for more checksum algorithms - run delayed iput at unlink time and don't push the work to cleaner thread where it's not properly throttled - wrap block mapping to structures and helpers, base for further refactoring - split large files, part 1: - space info handling - block group reservations - delayed refs - delayed allocation - other cleanups and refactoring" * tag 'for-5.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (103 commits) btrfs: fix memory leak of path on error return path btrfs: move the subvolume reservation stuff out of extent-tree.c btrfs: migrate the delalloc space stuff to it's own home btrfs: migrate btrfs_trans_release_chunk_metadata btrfs: migrate the delayed refs rsv code btrfs: Evaluate io_tree in find_lock_delalloc_range() btrfs: migrate the global_block_rsv helpers to block-rsv.c btrfs: migrate the block-rsv code to block-rsv.c btrfs: stop using block_rsv_release_bytes everywhere btrfs: cleanup the target logic in __btrfs_block_rsv_release btrfs: export __btrfs_block_rsv_release btrfs: export btrfs_block_rsv_add_bytes btrfs: move btrfs_block_rsv definitions into it's own header btrfs: Simplify update of space_info in __reserve_metadata_bytes() btrfs: unexport can_overcommit btrfs: move reserve_metadata_bytes and supporting code to space-info.c btrfs: move dump_space_info to space-info.c btrfs: export block_rsv_use_bytes btrfs: move btrfs_space_info_add_*_bytes to space-info.c btrfs: move the space info update macro to space-info.h ...
This commit is contained in:
commit
a18f877541
@ -2,7 +2,8 @@
|
||||
|
||||
config BTRFS_FS
|
||||
tristate "Btrfs filesystem support"
|
||||
select LIBCRC32C
|
||||
select CRYPTO
|
||||
select CRYPTO_CRC32C
|
||||
select ZLIB_INFLATE
|
||||
select ZLIB_DEFLATE
|
||||
select LZO_COMPRESS
|
||||
|
@ -10,7 +10,8 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
|
||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
@ -1465,12 +1465,11 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
*
|
||||
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
|
||||
*/
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
struct ulist *roots, struct ulist *tmp)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct ulist *tmp = NULL;
|
||||
struct ulist *roots = NULL;
|
||||
struct ulist_iterator uiter;
|
||||
struct ulist_node *node;
|
||||
struct seq_list elem = SEQ_LIST_INIT(elem);
|
||||
@ -1481,12 +1480,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
|
||||
.share_count = 0,
|
||||
};
|
||||
|
||||
tmp = ulist_alloc(GFP_NOFS);
|
||||
roots = ulist_alloc(GFP_NOFS);
|
||||
if (!tmp || !roots) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ulist_init(roots);
|
||||
ulist_init(tmp);
|
||||
|
||||
trans = btrfs_attach_transaction(root);
|
||||
if (IS_ERR(trans)) {
|
||||
@ -1527,8 +1522,8 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr)
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
}
|
||||
out:
|
||||
ulist_free(tmp);
|
||||
ulist_free(roots);
|
||||
ulist_release(roots);
|
||||
ulist_release(tmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -57,7 +57,8 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
|
||||
u64 start_off, struct btrfs_path *path,
|
||||
struct btrfs_inode_extref **ret_extref,
|
||||
u64 *found_off);
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr);
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
struct ulist *roots, struct ulist *tmp_ulist);
|
||||
|
||||
int __init btrfs_prelim_ref_init(void);
|
||||
void __cold btrfs_prelim_ref_exit(void);
|
||||
|
425
fs/btrfs/block-rsv.c
Normal file
425
fs/btrfs/block-rsv.c
Normal file
@ -0,0 +1,425 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "ctree.h"
|
||||
#include "block-rsv.h"
|
||||
#include "space-info.h"
|
||||
#include "math.h"
|
||||
#include "transaction.h"
|
||||
|
||||
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
u64 *qgroup_to_release_ret)
|
||||
{
|
||||
struct btrfs_space_info *space_info = block_rsv->space_info;
|
||||
u64 qgroup_to_release = 0;
|
||||
u64 ret;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
if (num_bytes == (u64)-1) {
|
||||
num_bytes = block_rsv->size;
|
||||
qgroup_to_release = block_rsv->qgroup_rsv_size;
|
||||
}
|
||||
block_rsv->size -= num_bytes;
|
||||
if (block_rsv->reserved >= block_rsv->size) {
|
||||
num_bytes = block_rsv->reserved - block_rsv->size;
|
||||
block_rsv->reserved = block_rsv->size;
|
||||
block_rsv->full = 1;
|
||||
} else {
|
||||
num_bytes = 0;
|
||||
}
|
||||
if (block_rsv->qgroup_rsv_reserved >= block_rsv->qgroup_rsv_size) {
|
||||
qgroup_to_release = block_rsv->qgroup_rsv_reserved -
|
||||
block_rsv->qgroup_rsv_size;
|
||||
block_rsv->qgroup_rsv_reserved = block_rsv->qgroup_rsv_size;
|
||||
} else {
|
||||
qgroup_to_release = 0;
|
||||
}
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
ret = num_bytes;
|
||||
if (num_bytes > 0) {
|
||||
if (dest) {
|
||||
spin_lock(&dest->lock);
|
||||
if (!dest->full) {
|
||||
u64 bytes_to_add;
|
||||
|
||||
bytes_to_add = dest->size - dest->reserved;
|
||||
bytes_to_add = min(num_bytes, bytes_to_add);
|
||||
dest->reserved += bytes_to_add;
|
||||
if (dest->reserved >= dest->size)
|
||||
dest->full = 1;
|
||||
num_bytes -= bytes_to_add;
|
||||
}
|
||||
spin_unlock(&dest->lock);
|
||||
}
|
||||
if (num_bytes)
|
||||
btrfs_space_info_add_old_bytes(fs_info, space_info,
|
||||
num_bytes);
|
||||
}
|
||||
if (qgroup_to_release_ret)
|
||||
*qgroup_to_release_ret = qgroup_to_release;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src,
|
||||
struct btrfs_block_rsv *dst, u64 num_bytes,
|
||||
bool update_size)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_block_rsv_use_bytes(src, num_bytes);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
btrfs_block_rsv_add_bytes(dst, num_bytes, update_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
|
||||
{
|
||||
memset(rsv, 0, sizeof(*rsv));
|
||||
spin_lock_init(&rsv->lock);
|
||||
rsv->type = type;
|
||||
}
|
||||
|
||||
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
unsigned short type)
|
||||
{
|
||||
btrfs_init_block_rsv(rsv, type);
|
||||
rsv->space_info = btrfs_find_space_info(fs_info,
|
||||
BTRFS_BLOCK_GROUP_METADATA);
|
||||
}
|
||||
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
unsigned short type)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv;
|
||||
|
||||
block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
|
||||
if (!block_rsv)
|
||||
return NULL;
|
||||
|
||||
btrfs_init_metadata_block_rsv(fs_info, block_rsv, type);
|
||||
return block_rsv;
|
||||
}
|
||||
|
||||
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv)
|
||||
{
|
||||
if (!rsv)
|
||||
return;
|
||||
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
|
||||
kfree(rsv);
|
||||
}
|
||||
|
||||
int btrfs_block_rsv_add(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (num_bytes == 0)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
||||
if (!ret)
|
||||
btrfs_block_rsv_add_bytes(block_rsv, num_bytes, true);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor)
|
||||
{
|
||||
u64 num_bytes = 0;
|
||||
int ret = -ENOSPC;
|
||||
|
||||
if (!block_rsv)
|
||||
return 0;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
num_bytes = div_factor(block_rsv->size, min_factor);
|
||||
if (block_rsv->reserved >= num_bytes)
|
||||
ret = 0;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_block_rsv_refill(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
u64 num_bytes = 0;
|
||||
int ret = -ENOSPC;
|
||||
|
||||
if (!block_rsv)
|
||||
return 0;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
num_bytes = min_reserved;
|
||||
if (block_rsv->reserved >= num_bytes)
|
||||
ret = 0;
|
||||
else
|
||||
num_bytes -= block_rsv->reserved;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
|
||||
if (!ret) {
|
||||
btrfs_block_rsv_add_bytes(block_rsv, num_bytes, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, u64 *qgroup_to_release)
|
||||
{
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
|
||||
struct btrfs_block_rsv *target = NULL;
|
||||
|
||||
/*
|
||||
* If we are the delayed_rsv then push to the global rsv, otherwise dump
|
||||
* into the delayed rsv if it is not full.
|
||||
*/
|
||||
if (block_rsv == delayed_rsv)
|
||||
target = global_rsv;
|
||||
else if (block_rsv != global_rsv && !delayed_rsv->full)
|
||||
target = delayed_rsv;
|
||||
|
||||
if (target && block_rsv->space_info != target->space_info)
|
||||
target = NULL;
|
||||
|
||||
return block_rsv_release_bytes(fs_info, block_rsv, target, num_bytes,
|
||||
qgroup_to_release);
|
||||
}
|
||||
|
||||
int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes)
|
||||
{
|
||||
int ret = -ENOSPC;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
if (block_rsv->reserved >= num_bytes) {
|
||||
block_rsv->reserved -= num_bytes;
|
||||
if (block_rsv->reserved < block_rsv->size)
|
||||
block_rsv->full = 0;
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock(&block_rsv->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, bool update_size)
|
||||
{
|
||||
spin_lock(&block_rsv->lock);
|
||||
block_rsv->reserved += num_bytes;
|
||||
if (update_size)
|
||||
block_rsv->size += num_bytes;
|
||||
else if (block_rsv->reserved >= block_rsv->size)
|
||||
block_rsv->full = 1;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
}
|
||||
|
||||
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
int min_factor)
|
||||
{
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
u64 min_bytes;
|
||||
|
||||
if (global_rsv->space_info != dest->space_info)
|
||||
return -ENOSPC;
|
||||
|
||||
spin_lock(&global_rsv->lock);
|
||||
min_bytes = div_factor(global_rsv->size, min_factor);
|
||||
if (global_rsv->reserved < min_bytes + num_bytes) {
|
||||
spin_unlock(&global_rsv->lock);
|
||||
return -ENOSPC;
|
||||
}
|
||||
global_rsv->reserved -= num_bytes;
|
||||
if (global_rsv->reserved < global_rsv->size)
|
||||
global_rsv->full = 0;
|
||||
spin_unlock(&global_rsv->lock);
|
||||
|
||||
btrfs_block_rsv_add_bytes(dest, num_bytes, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
|
||||
struct btrfs_space_info *sinfo = block_rsv->space_info;
|
||||
u64 num_bytes;
|
||||
|
||||
/*
|
||||
* The global block rsv is based on the size of the extent tree, the
|
||||
* checksum tree and the root tree. If the fs is empty we want to set
|
||||
* it to a minimal amount for safety.
|
||||
*/
|
||||
num_bytes = btrfs_root_used(&fs_info->extent_root->root_item) +
|
||||
btrfs_root_used(&fs_info->csum_root->root_item) +
|
||||
btrfs_root_used(&fs_info->tree_root->root_item);
|
||||
num_bytes = max_t(u64, num_bytes, SZ_16M);
|
||||
|
||||
spin_lock(&sinfo->lock);
|
||||
spin_lock(&block_rsv->lock);
|
||||
|
||||
block_rsv->size = min_t(u64, num_bytes, SZ_512M);
|
||||
|
||||
if (block_rsv->reserved < block_rsv->size) {
|
||||
num_bytes = btrfs_space_info_used(sinfo, true);
|
||||
if (sinfo->total_bytes > num_bytes) {
|
||||
num_bytes = sinfo->total_bytes - num_bytes;
|
||||
num_bytes = min(num_bytes,
|
||||
block_rsv->size - block_rsv->reserved);
|
||||
block_rsv->reserved += num_bytes;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
|
||||
num_bytes);
|
||||
trace_btrfs_space_reservation(fs_info, "space_info",
|
||||
sinfo->flags, num_bytes,
|
||||
1);
|
||||
}
|
||||
} else if (block_rsv->reserved > block_rsv->size) {
|
||||
num_bytes = block_rsv->reserved - block_rsv->size;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
|
||||
-num_bytes);
|
||||
trace_btrfs_space_reservation(fs_info, "space_info",
|
||||
sinfo->flags, num_bytes, 0);
|
||||
block_rsv->reserved = block_rsv->size;
|
||||
}
|
||||
|
||||
if (block_rsv->reserved == block_rsv->size)
|
||||
block_rsv->full = 1;
|
||||
else
|
||||
block_rsv->full = 0;
|
||||
|
||||
spin_unlock(&block_rsv->lock);
|
||||
spin_unlock(&sinfo->lock);
|
||||
}
|
||||
|
||||
void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_space_info *space_info;
|
||||
|
||||
space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
|
||||
fs_info->chunk_block_rsv.space_info = space_info;
|
||||
|
||||
space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
||||
fs_info->global_block_rsv.space_info = space_info;
|
||||
fs_info->trans_block_rsv.space_info = space_info;
|
||||
fs_info->empty_block_rsv.space_info = space_info;
|
||||
fs_info->delayed_block_rsv.space_info = space_info;
|
||||
fs_info->delayed_refs_rsv.space_info = space_info;
|
||||
|
||||
fs_info->extent_root->block_rsv = &fs_info->delayed_refs_rsv;
|
||||
fs_info->csum_root->block_rsv = &fs_info->delayed_refs_rsv;
|
||||
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
|
||||
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
|
||||
if (fs_info->quota_root)
|
||||
fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
|
||||
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
|
||||
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
}
|
||||
|
||||
void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1);
|
||||
WARN_ON(fs_info->trans_block_rsv.size > 0);
|
||||
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
|
||||
WARN_ON(fs_info->chunk_block_rsv.size > 0);
|
||||
WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
|
||||
WARN_ON(fs_info->delayed_block_rsv.size > 0);
|
||||
WARN_ON(fs_info->delayed_block_rsv.reserved > 0);
|
||||
WARN_ON(fs_info->delayed_refs_rsv.reserved > 0);
|
||||
WARN_ON(fs_info->delayed_refs_rsv.size > 0);
|
||||
}
|
||||
|
||||
static struct btrfs_block_rsv *get_block_rsv(
|
||||
const struct btrfs_trans_handle *trans,
|
||||
const struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_block_rsv *block_rsv = NULL;
|
||||
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
|
||||
(root == fs_info->csum_root && trans->adding_csums) ||
|
||||
(root == fs_info->uuid_root))
|
||||
block_rsv = trans->block_rsv;
|
||||
|
||||
if (!block_rsv)
|
||||
block_rsv = root->block_rsv;
|
||||
|
||||
if (!block_rsv)
|
||||
block_rsv = &fs_info->empty_block_rsv;
|
||||
|
||||
return block_rsv;
|
||||
}
|
||||
|
||||
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u32 blocksize)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_block_rsv *block_rsv;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
int ret;
|
||||
bool global_updated = false;
|
||||
|
||||
block_rsv = get_block_rsv(trans, root);
|
||||
|
||||
if (unlikely(block_rsv->size == 0))
|
||||
goto try_reserve;
|
||||
again:
|
||||
ret = btrfs_block_rsv_use_bytes(block_rsv, blocksize);
|
||||
if (!ret)
|
||||
return block_rsv;
|
||||
|
||||
if (block_rsv->failfast)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
|
||||
global_updated = true;
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* The global reserve still exists to save us from ourselves, so don't
|
||||
* warn_on if we are short on our delayed refs reserve.
|
||||
*/
|
||||
if (block_rsv->type != BTRFS_BLOCK_RSV_DELREFS &&
|
||||
btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
|
||||
static DEFINE_RATELIMIT_STATE(_rs,
|
||||
DEFAULT_RATELIMIT_INTERVAL * 10,
|
||||
/*DEFAULT_RATELIMIT_BURST*/ 1);
|
||||
if (__ratelimit(&_rs))
|
||||
WARN(1, KERN_DEBUG
|
||||
"BTRFS: block rsv returned %d\n", ret);
|
||||
}
|
||||
try_reserve:
|
||||
ret = btrfs_reserve_metadata_bytes(root, block_rsv, blocksize,
|
||||
BTRFS_RESERVE_NO_FLUSH);
|
||||
if (!ret)
|
||||
return block_rsv;
|
||||
/*
|
||||
* If we couldn't reserve metadata bytes try and use some from
|
||||
* the global reserve if its space type is the same as the global
|
||||
* reservation.
|
||||
*/
|
||||
if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
|
||||
block_rsv->space_info == global_rsv->space_info) {
|
||||
ret = btrfs_block_rsv_use_bytes(global_rsv, blocksize);
|
||||
if (!ret)
|
||||
return global_rsv;
|
||||
}
|
||||
return ERR_PTR(ret);
|
||||
}
|
101
fs/btrfs/block-rsv.h
Normal file
101
fs/btrfs/block-rsv.h
Normal file
@ -0,0 +1,101 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_BLOCK_RSV_H
|
||||
#define BTRFS_BLOCK_RSV_H
|
||||
|
||||
struct btrfs_trans_handle;
|
||||
enum btrfs_reserve_flush_enum;
|
||||
|
||||
/*
|
||||
* Types of block reserves
|
||||
*/
|
||||
enum {
|
||||
BTRFS_BLOCK_RSV_GLOBAL,
|
||||
BTRFS_BLOCK_RSV_DELALLOC,
|
||||
BTRFS_BLOCK_RSV_TRANS,
|
||||
BTRFS_BLOCK_RSV_CHUNK,
|
||||
BTRFS_BLOCK_RSV_DELOPS,
|
||||
BTRFS_BLOCK_RSV_DELREFS,
|
||||
BTRFS_BLOCK_RSV_EMPTY,
|
||||
BTRFS_BLOCK_RSV_TEMP,
|
||||
};
|
||||
|
||||
struct btrfs_block_rsv {
|
||||
u64 size;
|
||||
u64 reserved;
|
||||
struct btrfs_space_info *space_info;
|
||||
spinlock_t lock;
|
||||
unsigned short full;
|
||||
unsigned short type;
|
||||
unsigned short failfast;
|
||||
|
||||
/*
|
||||
* Qgroup equivalent for @size @reserved
|
||||
*
|
||||
* Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
|
||||
* about things like csum size nor how many tree blocks it will need to
|
||||
* reserve.
|
||||
*
|
||||
* Qgroup cares more about net change of the extent usage.
|
||||
*
|
||||
* So for one newly inserted file extent, in worst case it will cause
|
||||
* leaf split and level increase, nodesize for each file extent is
|
||||
* already too much.
|
||||
*
|
||||
* In short, qgroup_size/reserved is the upper limit of possible needed
|
||||
* qgroup metadata reservation.
|
||||
*/
|
||||
u64 qgroup_rsv_size;
|
||||
u64 qgroup_rsv_reserved;
|
||||
};
|
||||
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
unsigned short type);
|
||||
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
unsigned short type);
|
||||
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
int btrfs_block_rsv_add(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
|
||||
int btrfs_block_rsv_refill(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
|
||||
struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
|
||||
bool update_size);
|
||||
int btrfs_block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes);
|
||||
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
int min_factor);
|
||||
void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, bool update_size);
|
||||
u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, u64 *qgroup_to_release);
|
||||
void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u32 blocksize);
|
||||
|
||||
static inline void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes)
|
||||
{
|
||||
__btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
|
||||
}
|
||||
|
||||
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u32 blocksize)
|
||||
{
|
||||
btrfs_block_rsv_add_bytes(block_rsv, blocksize, false);
|
||||
btrfs_block_rsv_release(fs_info, block_rsv, 0);
|
||||
}
|
||||
|
||||
#endif /* BTRFS_BLOCK_RSV_H */
|
@ -337,22 +337,34 @@ static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
|
||||
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
|
||||
}
|
||||
|
||||
/* Array of bytes with variable length, hexadecimal format 0x1234 */
|
||||
#define CSUM_FMT "0x%*phN"
|
||||
#define CSUM_FMT_VALUE(size, bytes) size, bytes
|
||||
|
||||
static inline void btrfs_print_data_csum_error(struct btrfs_inode *inode,
|
||||
u64 logical_start, u32 csum, u32 csum_expected, int mirror_num)
|
||||
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_super_block *sb = root->fs_info->super_copy;
|
||||
const u16 csum_size = btrfs_super_csum_size(sb);
|
||||
|
||||
/* Output minus objectid, which is more meaningful */
|
||||
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID)
|
||||
btrfs_warn_rl(root->fs_info,
|
||||
"csum failed root %lld ino %lld off %llu csum 0x%08x expected csum 0x%08x mirror %d",
|
||||
"csum failed root %lld ino %lld off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
|
||||
root->root_key.objectid, btrfs_ino(inode),
|
||||
logical_start, csum, csum_expected, mirror_num);
|
||||
logical_start,
|
||||
CSUM_FMT_VALUE(csum_size, csum),
|
||||
CSUM_FMT_VALUE(csum_size, csum_expected),
|
||||
mirror_num);
|
||||
else
|
||||
btrfs_warn_rl(root->fs_info,
|
||||
"csum failed root %llu ino %llu off %llu csum 0x%08x expected csum 0x%08x mirror %d",
|
||||
"csum failed root %llu ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
|
||||
root->root_key.objectid, btrfs_ino(inode),
|
||||
logical_start, csum, csum_expected, mirror_num);
|
||||
logical_start,
|
||||
CSUM_FMT_VALUE(csum_size, csum),
|
||||
CSUM_FMT_VALUE(csum_size, csum_expected),
|
||||
mirror_num);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -83,7 +83,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
@ -1710,9 +1710,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
|
||||
char **datav, unsigned int num_pages)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = state->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
struct btrfs_header *h;
|
||||
u8 csum[BTRFS_CSUM_SIZE];
|
||||
u32 crc = ~(u32)0;
|
||||
unsigned int i;
|
||||
|
||||
if (num_pages * PAGE_SIZE < state->metablock_size)
|
||||
@ -1723,14 +1723,17 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
|
||||
if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE))
|
||||
return 1;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
|
||||
size_t sublen = i ? PAGE_SIZE :
|
||||
(PAGE_SIZE - BTRFS_CSUM_SIZE);
|
||||
|
||||
crc = crc32c(crc, data, sublen);
|
||||
crypto_shash_update(shash, data, sublen);
|
||||
}
|
||||
btrfs_csum_final(crc, csum);
|
||||
crypto_shash_final(shash, csum);
|
||||
if (memcmp(csum, h->csum, state->csum_size))
|
||||
return 1;
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/log2.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
@ -42,6 +43,22 @@ const char* btrfs_compress_type2str(enum btrfs_compression_type type)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool btrfs_compress_is_valid_type(const char *str, size_t len)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < ARRAY_SIZE(btrfs_compress_types); i++) {
|
||||
size_t comp_len = strlen(btrfs_compress_types[i]);
|
||||
|
||||
if (len < comp_len)
|
||||
continue;
|
||||
|
||||
if (!strncmp(btrfs_compress_types[i], str, comp_len))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int btrfs_decompress_bio(struct compressed_bio *cb);
|
||||
|
||||
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
|
||||
@ -57,32 +74,37 @@ static int check_compressed_csum(struct btrfs_inode *inode,
|
||||
struct compressed_bio *cb,
|
||||
u64 disk_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||
int ret;
|
||||
struct page *page;
|
||||
unsigned long i;
|
||||
char *kaddr;
|
||||
u32 csum;
|
||||
u32 *cb_sum = &cb->sums;
|
||||
u8 csum[BTRFS_CSUM_SIZE];
|
||||
u8 *cb_sum = cb->sums;
|
||||
|
||||
if (inode->flags & BTRFS_INODE_NODATASUM)
|
||||
return 0;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
for (i = 0; i < cb->nr_pages; i++) {
|
||||
page = cb->compressed_pages[i];
|
||||
csum = ~(u32)0;
|
||||
|
||||
crypto_shash_init(shash);
|
||||
kaddr = kmap_atomic(page);
|
||||
csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
|
||||
btrfs_csum_final(csum, (u8 *)&csum);
|
||||
crypto_shash_update(shash, kaddr, PAGE_SIZE);
|
||||
kunmap_atomic(kaddr);
|
||||
crypto_shash_final(shash, (u8 *)&csum);
|
||||
|
||||
if (csum != *cb_sum) {
|
||||
btrfs_print_data_csum_error(inode, disk_start, csum,
|
||||
*cb_sum, cb->mirror_num);
|
||||
if (memcmp(&csum, cb_sum, csum_size)) {
|
||||
btrfs_print_data_csum_error(inode, disk_start,
|
||||
csum, cb_sum, cb->mirror_num);
|
||||
ret = -EIO;
|
||||
goto fail;
|
||||
}
|
||||
cb_sum++;
|
||||
cb_sum += csum_size;
|
||||
|
||||
}
|
||||
ret = 0;
|
||||
@ -318,7 +340,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
|
||||
bdev = fs_info->fs_devices->latest_bdev;
|
||||
|
||||
bio = btrfs_bio_alloc(bdev, first_byte);
|
||||
bio = btrfs_bio_alloc(first_byte);
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_opf = REQ_OP_WRITE | write_flags;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
@ -360,7 +383,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
bio = btrfs_bio_alloc(bdev, first_byte);
|
||||
bio = btrfs_bio_alloc(first_byte);
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_opf = REQ_OP_WRITE | write_flags;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = end_compressed_bio_write;
|
||||
@ -536,7 +560,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
struct extent_map *em;
|
||||
blk_status_t ret = BLK_STS_RESOURCE;
|
||||
int faili = 0;
|
||||
u32 *sums;
|
||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||
u8 *sums;
|
||||
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
|
||||
@ -558,7 +583,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
cb->errors = 0;
|
||||
cb->inode = inode;
|
||||
cb->mirror_num = mirror_num;
|
||||
sums = &cb->sums;
|
||||
sums = cb->sums;
|
||||
|
||||
cb->start = em->orig_start;
|
||||
em_len = em->len;
|
||||
@ -597,7 +622,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
/* include any pages we added in add_ra-bio_pages */
|
||||
cb->len = bio->bi_iter.bi_size;
|
||||
|
||||
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
|
||||
comp_bio = btrfs_bio_alloc(cur_disk_byte);
|
||||
bio_set_dev(comp_bio, bdev);
|
||||
comp_bio->bi_opf = REQ_OP_READ;
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
@ -617,6 +643,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
page->mapping = NULL;
|
||||
if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
|
||||
PAGE_SIZE) {
|
||||
unsigned int nr_sectors;
|
||||
|
||||
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
|
||||
BTRFS_WQ_ENDIO_DATA);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
@ -634,8 +662,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
sums);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
sums += DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
||||
fs_info->sectorsize);
|
||||
|
||||
nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
|
||||
fs_info->sectorsize);
|
||||
sums += csum_size * nr_sectors;
|
||||
|
||||
ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
|
||||
if (ret) {
|
||||
@ -643,7 +673,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
bio_endio(comp_bio);
|
||||
}
|
||||
|
||||
comp_bio = btrfs_bio_alloc(bdev, cur_disk_byte);
|
||||
comp_bio = btrfs_bio_alloc(cur_disk_byte);
|
||||
bio_set_dev(comp_bio, bdev);
|
||||
comp_bio->bi_opf = REQ_OP_READ;
|
||||
comp_bio->bi_private = cb;
|
||||
comp_bio->bi_end_io = end_compressed_bio_read;
|
||||
|
@ -61,7 +61,7 @@ struct compressed_bio {
|
||||
* the start of a variable length array of checksums only
|
||||
* used by reads
|
||||
*/
|
||||
u32 sums;
|
||||
u8 sums[];
|
||||
};
|
||||
|
||||
static inline unsigned int btrfs_compress_type(unsigned int type_level)
|
||||
@ -173,6 +173,7 @@ extern const struct btrfs_compress_op btrfs_lzo_compress;
|
||||
extern const struct btrfs_compress_op btrfs_zstd_compress;
|
||||
|
||||
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
|
||||
bool btrfs_compress_is_valid_type(const char *str, size_t len);
|
||||
|
||||
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
|
||||
|
||||
|
282
fs/btrfs/ctree.h
282
fs/btrfs/ctree.h
@ -19,6 +19,7 @@
|
||||
#include <linux/kobject.h>
|
||||
#include <trace/events/btrfs.h>
|
||||
#include <asm/kmap_types.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/btrfs.h>
|
||||
#include <linux/btrfs_tree.h>
|
||||
@ -31,11 +32,13 @@
|
||||
#include "extent_io.h"
|
||||
#include "extent_map.h"
|
||||
#include "async-thread.h"
|
||||
#include "block-rsv.h"
|
||||
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_transaction;
|
||||
struct btrfs_pending_snapshot;
|
||||
struct btrfs_delayed_ref_root;
|
||||
struct btrfs_space_info;
|
||||
extern struct kmem_cache *btrfs_trans_handle_cachep;
|
||||
extern struct kmem_cache *btrfs_bit_radix_cachep;
|
||||
extern struct kmem_cache *btrfs_path_cachep;
|
||||
@ -45,7 +48,16 @@ struct btrfs_ref;
|
||||
|
||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||
|
||||
#define BTRFS_MAX_MIRRORS 3
|
||||
/*
|
||||
* Maximum number of mirrors that can be available for all profiles counting
|
||||
* the target device of dev-replace as one. During an active device replace
|
||||
* procedure, the target device of the copy operation is a mirror for the
|
||||
* filesystem data as well that can be used to read data in order to repair
|
||||
* read errors on other disks.
|
||||
*
|
||||
* Current value is derived from RAID1 with 2 copies.
|
||||
*/
|
||||
#define BTRFS_MAX_MIRRORS (2 + 1)
|
||||
|
||||
#define BTRFS_MAX_LEVEL 8
|
||||
|
||||
@ -72,6 +84,7 @@ struct btrfs_ref;
|
||||
|
||||
/* four bytes for CRC32 */
|
||||
static const int btrfs_csum_sizes[] = { 4 };
|
||||
static const char *btrfs_csum_names[] = { "crc32c" };
|
||||
|
||||
#define BTRFS_EMPTY_DIR_SIZE 0
|
||||
|
||||
@ -99,10 +112,6 @@ static inline u32 count_max_extents(u64 size)
|
||||
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
|
||||
}
|
||||
|
||||
struct btrfs_mapping_tree {
|
||||
struct extent_map_tree map_tree;
|
||||
};
|
||||
|
||||
static inline unsigned long btrfs_chunk_item_size(int num_stripes)
|
||||
{
|
||||
BUG_ON(num_stripes == 0);
|
||||
@ -395,115 +404,6 @@ struct raid_kobject {
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct btrfs_space_info {
|
||||
spinlock_t lock;
|
||||
|
||||
u64 total_bytes; /* total bytes in the space,
|
||||
this doesn't take mirrors into account */
|
||||
u64 bytes_used; /* total bytes used,
|
||||
this doesn't take mirrors into account */
|
||||
u64 bytes_pinned; /* total bytes pinned, will be freed when the
|
||||
transaction finishes */
|
||||
u64 bytes_reserved; /* total bytes the allocator has reserved for
|
||||
current allocations */
|
||||
u64 bytes_may_use; /* number of bytes that may be used for
|
||||
delalloc/allocations */
|
||||
u64 bytes_readonly; /* total bytes that are read only */
|
||||
|
||||
u64 max_extent_size; /* This will hold the maximum extent size of
|
||||
the space info if we had an ENOSPC in the
|
||||
allocator. */
|
||||
|
||||
unsigned int full:1; /* indicates that we cannot allocate any more
|
||||
chunks for this space */
|
||||
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
|
||||
|
||||
unsigned int flush:1; /* set if we are trying to make space */
|
||||
|
||||
unsigned int force_alloc; /* set if we need to force a chunk
|
||||
alloc for this space */
|
||||
|
||||
u64 disk_used; /* total bytes used on disk */
|
||||
u64 disk_total; /* total bytes on disk, takes mirrors into
|
||||
account */
|
||||
|
||||
u64 flags;
|
||||
|
||||
/*
|
||||
* bytes_pinned is kept in line with what is actually pinned, as in
|
||||
* we've called update_block_group and dropped the bytes_used counter
|
||||
* and increased the bytes_pinned counter. However this means that
|
||||
* bytes_pinned does not reflect the bytes that will be pinned once the
|
||||
* delayed refs are flushed, so this counter is inc'ed every time we
|
||||
* call btrfs_free_extent so it is a realtime count of what will be
|
||||
* freed once the transaction is committed. It will be zeroed every
|
||||
* time the transaction commits.
|
||||
*/
|
||||
struct percpu_counter total_bytes_pinned;
|
||||
|
||||
struct list_head list;
|
||||
/* Protected by the spinlock 'lock'. */
|
||||
struct list_head ro_bgs;
|
||||
struct list_head priority_tickets;
|
||||
struct list_head tickets;
|
||||
/*
|
||||
* tickets_id just indicates the next ticket will be handled, so note
|
||||
* it's not stored per ticket.
|
||||
*/
|
||||
u64 tickets_id;
|
||||
|
||||
struct rw_semaphore groups_sem;
|
||||
/* for block groups in our same type */
|
||||
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
|
||||
wait_queue_head_t wait;
|
||||
|
||||
struct kobject kobj;
|
||||
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
|
||||
};
|
||||
|
||||
/*
|
||||
* Types of block reserves
|
||||
*/
|
||||
enum {
|
||||
BTRFS_BLOCK_RSV_GLOBAL,
|
||||
BTRFS_BLOCK_RSV_DELALLOC,
|
||||
BTRFS_BLOCK_RSV_TRANS,
|
||||
BTRFS_BLOCK_RSV_CHUNK,
|
||||
BTRFS_BLOCK_RSV_DELOPS,
|
||||
BTRFS_BLOCK_RSV_DELREFS,
|
||||
BTRFS_BLOCK_RSV_EMPTY,
|
||||
BTRFS_BLOCK_RSV_TEMP,
|
||||
};
|
||||
|
||||
struct btrfs_block_rsv {
|
||||
u64 size;
|
||||
u64 reserved;
|
||||
struct btrfs_space_info *space_info;
|
||||
spinlock_t lock;
|
||||
unsigned short full;
|
||||
unsigned short type;
|
||||
unsigned short failfast;
|
||||
|
||||
/*
|
||||
* Qgroup equivalent for @size @reserved
|
||||
*
|
||||
* Unlike normal @size/@reserved for inode rsv, qgroup doesn't care
|
||||
* about things like csum size nor how many tree blocks it will need to
|
||||
* reserve.
|
||||
*
|
||||
* Qgroup cares more about net change of the extent usage.
|
||||
*
|
||||
* So for one newly inserted file extent, in worst case it will cause
|
||||
* leaf split and level increase, nodesize for each file extent is
|
||||
* already too much.
|
||||
*
|
||||
* In short, qgroup_size/reserved is the upper limit of possible needed
|
||||
* qgroup metadata reservation.
|
||||
*/
|
||||
u64 qgroup_rsv_size;
|
||||
u64 qgroup_rsv_reserved;
|
||||
};
|
||||
|
||||
/*
|
||||
* free clusters are used to claim free space in relatively large chunks,
|
||||
* allowing us to do less seeky writes. They are used for all metadata
|
||||
@ -786,11 +686,18 @@ enum {
|
||||
/*
|
||||
* Indicate that balance has been set up from the ioctl and is in the
|
||||
* main phase. The fs_info::balance_ctl is initialized.
|
||||
* Set and cleared while holding fs_info::balance_mutex.
|
||||
*/
|
||||
BTRFS_FS_BALANCE_RUNNING,
|
||||
|
||||
/* Indicate that the cleaner thread is awake and doing something. */
|
||||
BTRFS_FS_CLEANER_RUNNING,
|
||||
|
||||
/*
|
||||
* The checksumming has an optimized version and is considered fast,
|
||||
* so we don't need to offload checksums to workqueues.
|
||||
*/
|
||||
BTRFS_FS_CSUM_IMPL_FAST,
|
||||
};
|
||||
|
||||
struct btrfs_fs_info {
|
||||
@ -824,7 +731,7 @@ struct btrfs_fs_info {
|
||||
struct extent_io_tree *pinned_extents;
|
||||
|
||||
/* logical->physical extent mapping */
|
||||
struct btrfs_mapping_tree mapping_tree;
|
||||
struct extent_map_tree mapping_tree;
|
||||
|
||||
/*
|
||||
* block reservation for extent, checksum, root tree and
|
||||
@ -1160,6 +1067,14 @@ struct btrfs_fs_info {
|
||||
spinlock_t swapfile_pins_lock;
|
||||
struct rb_root swapfile_pins;
|
||||
|
||||
struct crypto_shash *csum_shash;
|
||||
|
||||
/*
|
||||
* Number of send operations in progress.
|
||||
* Updated while holding fs_info::balance_mutex.
|
||||
*/
|
||||
int send_in_progress;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
spinlock_t ref_verify_lock;
|
||||
struct rb_root block_tree;
|
||||
@ -2451,6 +2366,11 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s)
|
||||
return btrfs_csum_sizes[t];
|
||||
}
|
||||
|
||||
static inline const char *btrfs_super_csum_name(u16 csum_type)
|
||||
{
|
||||
/* csum type is validated at mount time */
|
||||
return btrfs_csum_names[csum_type];
|
||||
}
|
||||
|
||||
/*
|
||||
* The leaf data grows from end-to-front in the node.
|
||||
@ -2642,6 +2562,16 @@ BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right,
|
||||
((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \
|
||||
btrfs_item_offset_nr(leaf, slot)))
|
||||
|
||||
static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length)
|
||||
{
|
||||
return crc32c(crc, address, length);
|
||||
}
|
||||
|
||||
static inline void btrfs_crc32c_final(u32 crc, u8 *result)
|
||||
{
|
||||
put_unaligned_le32(~crc, result);
|
||||
}
|
||||
|
||||
static inline u64 btrfs_name_hash(const char *name, int len)
|
||||
{
|
||||
return crc32c((u32)~1, name, len);
|
||||
@ -2656,12 +2586,6 @@ static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
|
||||
return (u64) crc32c(parent_objectid, name, len);
|
||||
}
|
||||
|
||||
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
|
||||
{
|
||||
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
|
||||
(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
|
||||
}
|
||||
|
||||
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
|
||||
{
|
||||
return mapping_gfp_constraint(mapping, ~__GFP_FS);
|
||||
@ -2698,8 +2622,6 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
|
||||
}
|
||||
|
||||
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
|
||||
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
|
||||
const u64 start);
|
||||
void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
|
||||
@ -2814,17 +2736,28 @@ enum btrfs_flush_state {
|
||||
COMMIT_TRANS = 9,
|
||||
};
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
|
||||
int btrfs_check_data_free_space(struct inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
void btrfs_free_reserved_data_space(struct inode *inode,
|
||||
struct extent_changeset *reserved, u64 start, u64 len);
|
||||
void btrfs_delalloc_release_space(struct inode *inode,
|
||||
struct extent_changeset *reserved,
|
||||
u64 start, u64 len, bool qgroup_free);
|
||||
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
|
||||
u64 len);
|
||||
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
|
||||
/*
|
||||
* control flags for do_chunk_alloc's force field
|
||||
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
|
||||
* if we really need one.
|
||||
*
|
||||
* CHUNK_ALLOC_LIMITED means to only try and allocate one
|
||||
* if we have very few chunks already allocated. This is
|
||||
* used as part of the clustering code to help make sure
|
||||
* we have a good pool of storage to cluster in, without
|
||||
* filling the FS with empty chunks
|
||||
*
|
||||
* CHUNK_ALLOC_FORCE means it must try to allocate one
|
||||
*
|
||||
*/
|
||||
enum btrfs_chunk_alloc_enum {
|
||||
CHUNK_ALLOC_NO_FORCE,
|
||||
CHUNK_ALLOC_LIMITED,
|
||||
CHUNK_ALLOC_FORCE,
|
||||
};
|
||||
|
||||
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
||||
enum btrfs_chunk_alloc_enum force);
|
||||
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
int nitems, bool use_global_rsv);
|
||||
@ -2834,41 +2767,6 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
|
||||
bool qgroup_free);
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
|
||||
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
bool qgroup_free);
|
||||
int btrfs_delalloc_reserve_space(struct inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type);
|
||||
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
unsigned short type);
|
||||
void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv,
|
||||
unsigned short type);
|
||||
void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv);
|
||||
int btrfs_block_rsv_add(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
int btrfs_block_rsv_check(struct btrfs_block_rsv *block_rsv, int min_factor);
|
||||
int btrfs_block_rsv_refill(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, u64 min_reserved,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
|
||||
struct btrfs_block_rsv *dst_rsv, u64 num_bytes,
|
||||
bool update_size);
|
||||
int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
int min_factor);
|
||||
void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes);
|
||||
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
|
||||
void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
|
||||
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *src,
|
||||
u64 num_bytes);
|
||||
int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
|
||||
void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||
@ -3186,7 +3084,8 @@ int btrfs_find_name_in_ext_backref(struct extent_buffer *leaf, int slot,
|
||||
struct btrfs_dio_private;
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst);
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u8 *dst);
|
||||
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
|
||||
u64 logical_offset);
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
@ -3514,8 +3413,7 @@ __cold
|
||||
static inline void assfail(const char *expr, const char *file, int line)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
|
||||
pr_err("assertion failed: %s, file: %s, line: %d\n",
|
||||
expr, file, line);
|
||||
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
@ -3599,10 +3497,11 @@ do { \
|
||||
/* compatibility and incompatibility defines */
|
||||
|
||||
#define btrfs_set_fs_incompat(__fs_info, opt) \
|
||||
__btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
|
||||
__btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \
|
||||
#opt)
|
||||
|
||||
static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
|
||||
u64 flag)
|
||||
u64 flag, const char* name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
@ -3615,18 +3514,20 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
|
||||
if (!(features & flag)) {
|
||||
features |= flag;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
btrfs_info(fs_info, "setting %llu feature flag",
|
||||
flag);
|
||||
btrfs_info(fs_info,
|
||||
"setting incompat feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
}
|
||||
|
||||
#define btrfs_clear_fs_incompat(__fs_info, opt) \
|
||||
__btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
|
||||
__btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \
|
||||
#opt)
|
||||
|
||||
static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
|
||||
u64 flag)
|
||||
u64 flag, const char* name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
@ -3639,8 +3540,9 @@ static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
|
||||
if (features & flag) {
|
||||
features &= ~flag;
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
btrfs_info(fs_info, "clearing %llu feature flag",
|
||||
flag);
|
||||
btrfs_info(fs_info,
|
||||
"clearing incompat feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
@ -3657,10 +3559,11 @@ static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
|
||||
}
|
||||
|
||||
#define btrfs_set_fs_compat_ro(__fs_info, opt) \
|
||||
__btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
|
||||
__btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \
|
||||
#opt)
|
||||
|
||||
static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
|
||||
u64 flag)
|
||||
u64 flag, const char *name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
@ -3673,18 +3576,20 @@ static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
|
||||
if (!(features & flag)) {
|
||||
features |= flag;
|
||||
btrfs_set_super_compat_ro_flags(disk_super, features);
|
||||
btrfs_info(fs_info, "setting %llu ro feature flag",
|
||||
flag);
|
||||
btrfs_info(fs_info,
|
||||
"setting compat-ro feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
}
|
||||
|
||||
#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
|
||||
__btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
|
||||
__btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \
|
||||
#opt)
|
||||
|
||||
static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
|
||||
u64 flag)
|
||||
u64 flag, const char *name)
|
||||
{
|
||||
struct btrfs_super_block *disk_super;
|
||||
u64 features;
|
||||
@ -3697,8 +3602,9 @@ static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
|
||||
if (features & flag) {
|
||||
features &= ~flag;
|
||||
btrfs_set_super_compat_ro_flags(disk_super, features);
|
||||
btrfs_info(fs_info, "clearing %llu ro feature flag",
|
||||
flag);
|
||||
btrfs_info(fs_info,
|
||||
"clearing compat-ro feature flag for %s (0x%llx)",
|
||||
name, flag);
|
||||
}
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
}
|
||||
|
494
fs/btrfs/delalloc-space.c
Normal file
494
fs/btrfs/delalloc-space.c
Normal file
@ -0,0 +1,494 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "ctree.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "block-rsv.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "space-info.h"
|
||||
#include "transaction.h"
|
||||
#include "qgroup.h"
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_space_info *data_sinfo = fs_info->data_sinfo;
|
||||
u64 used;
|
||||
int ret = 0;
|
||||
int need_commit = 2;
|
||||
int have_pinned_space;
|
||||
|
||||
/* Make sure bytes are sectorsize aligned */
|
||||
bytes = ALIGN(bytes, fs_info->sectorsize);
|
||||
|
||||
if (btrfs_is_free_space_inode(inode)) {
|
||||
need_commit = 0;
|
||||
ASSERT(current->journal_info);
|
||||
}
|
||||
|
||||
again:
|
||||
/* Make sure we have enough space to handle the data first */
|
||||
spin_lock(&data_sinfo->lock);
|
||||
used = btrfs_space_info_used(data_sinfo, true);
|
||||
|
||||
if (used + bytes > data_sinfo->total_bytes) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
/*
|
||||
* If we don't have enough free bytes in this space then we need
|
||||
* to alloc a new chunk.
|
||||
*/
|
||||
if (!data_sinfo->full) {
|
||||
u64 alloc_target;
|
||||
|
||||
data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
|
||||
alloc_target = btrfs_data_alloc_profile(fs_info);
|
||||
/*
|
||||
* It is ugly that we don't call nolock join
|
||||
* transaction for the free space inode case here.
|
||||
* But it is safe because we only do the data space
|
||||
* reservation for the free space cache in the
|
||||
* transaction context, the common join transaction
|
||||
* just increase the counter of the current transaction
|
||||
* handler, doesn't try to acquire the trans_lock of
|
||||
* the fs.
|
||||
*/
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
|
||||
ret = btrfs_chunk_alloc(trans, alloc_target,
|
||||
CHUNK_ALLOC_NO_FORCE);
|
||||
btrfs_end_transaction(trans);
|
||||
if (ret < 0) {
|
||||
if (ret != -ENOSPC)
|
||||
return ret;
|
||||
else {
|
||||
have_pinned_space = 1;
|
||||
goto commit_trans;
|
||||
}
|
||||
}
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we don't have enough pinned space to deal with this
|
||||
* allocation, and no removed chunk in current transaction,
|
||||
* don't bother committing the transaction.
|
||||
*/
|
||||
have_pinned_space = __percpu_counter_compare(
|
||||
&data_sinfo->total_bytes_pinned,
|
||||
used + bytes - data_sinfo->total_bytes,
|
||||
BTRFS_TOTAL_BYTES_PINNED_BATCH);
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
|
||||
/* Commit the current transaction and try again */
|
||||
commit_trans:
|
||||
if (need_commit) {
|
||||
need_commit--;
|
||||
|
||||
if (need_commit > 0) {
|
||||
btrfs_start_delalloc_roots(fs_info, -1);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0,
|
||||
(u64)-1);
|
||||
}
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
if (have_pinned_space >= 0 ||
|
||||
test_bit(BTRFS_TRANS_HAVE_FREE_BGS,
|
||||
&trans->transaction->flags) ||
|
||||
need_commit > 0) {
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* The cleaner kthread might still be doing iput
|
||||
* operations. Wait for it to finish so that
|
||||
* more space is released. We don't need to
|
||||
* explicitly run the delayed iputs here because
|
||||
* the commit_transaction would have woken up
|
||||
* the cleaner.
|
||||
*/
|
||||
ret = btrfs_wait_on_delayed_iputs(fs_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto again;
|
||||
} else {
|
||||
btrfs_end_transaction(trans);
|
||||
}
|
||||
}
|
||||
|
||||
trace_btrfs_space_reservation(fs_info,
|
||||
"space_info:enospc",
|
||||
data_sinfo->flags, bytes, 1);
|
||||
return -ENOSPC;
|
||||
}
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, bytes);
|
||||
trace_btrfs_space_reservation(fs_info, "space_info",
|
||||
data_sinfo->flags, bytes, 1);
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_data_free_space(struct inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
int ret;
|
||||
|
||||
/* align the range */
|
||||
len = round_up(start + len, fs_info->sectorsize) -
|
||||
round_down(start, fs_info->sectorsize);
|
||||
start = round_down(start, fs_info->sectorsize);
|
||||
|
||||
ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode), len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
|
||||
ret = btrfs_qgroup_reserve_data(inode, reserved, start, len);
|
||||
if (ret < 0)
|
||||
btrfs_free_reserved_data_space_noquota(inode, start, len);
|
||||
else
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called if we need to clear a data reservation for this inode
|
||||
* Normally in a error case.
|
||||
*
|
||||
* This one will *NOT* use accurate qgroup reserved space API, just for case
|
||||
* which we can't sleep and is sure it won't affect qgroup reserved space.
|
||||
* Like clear_bit_hook().
|
||||
*/
|
||||
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
|
||||
u64 len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
|
||||
/* Make sure the range is aligned to sectorsize */
|
||||
len = round_up(start + len, fs_info->sectorsize) -
|
||||
round_down(start, fs_info->sectorsize);
|
||||
start = round_down(start, fs_info->sectorsize);
|
||||
|
||||
data_sinfo = fs_info->data_sinfo;
|
||||
spin_lock(&data_sinfo->lock);
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, data_sinfo, -len);
|
||||
trace_btrfs_space_reservation(fs_info, "space_info",
|
||||
data_sinfo->flags, len, 0);
|
||||
spin_unlock(&data_sinfo->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called if we need to clear a data reservation for this inode
|
||||
* Normally in a error case.
|
||||
*
|
||||
* This one will handle the per-inode data rsv map for accurate reserved
|
||||
* space framework.
|
||||
*/
|
||||
void btrfs_free_reserved_data_space(struct inode *inode,
|
||||
struct extent_changeset *reserved, u64 start, u64 len)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
/* Make sure the range is aligned to sectorsize */
|
||||
len = round_up(start + len, root->fs_info->sectorsize) -
|
||||
round_down(start, root->fs_info->sectorsize);
|
||||
start = round_down(start, root->fs_info->sectorsize);
|
||||
|
||||
btrfs_free_reserved_data_space_noquota(inode, start, len);
|
||||
btrfs_qgroup_free_data(inode, reserved, start, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_inode_rsv_release - release any excessive reservation.
|
||||
* @inode - the inode we need to release from.
|
||||
* @qgroup_free - free or convert qgroup meta.
|
||||
* Unlike normal operation, qgroup meta reservation needs to know if we are
|
||||
* freeing qgroup reservation or just converting it into per-trans. Normally
|
||||
* @qgroup_free is true for error handling, and false for normal release.
|
||||
*
|
||||
* This is the same as btrfs_block_rsv_release, except that it handles the
|
||||
* tracepoint for the reservation.
|
||||
*/
|
||||
static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
||||
u64 released = 0;
|
||||
u64 qgroup_to_release = 0;
|
||||
|
||||
/*
|
||||
* Since we statically set the block_rsv->size we just want to say we
|
||||
* are releasing 0 bytes, and then we'll just get the reservation over
|
||||
* the size free'd.
|
||||
*/
|
||||
released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
|
||||
&qgroup_to_release);
|
||||
if (released > 0)
|
||||
trace_btrfs_space_reservation(fs_info, "delalloc",
|
||||
btrfs_ino(inode), released, 0);
|
||||
if (qgroup_free)
|
||||
btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release);
|
||||
else
|
||||
btrfs_qgroup_convert_reserved_meta(inode->root,
|
||||
qgroup_to_release);
|
||||
}
|
||||
|
||||
static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_inode *inode)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
||||
u64 reserve_size = 0;
|
||||
u64 qgroup_rsv_size = 0;
|
||||
u64 csum_leaves;
|
||||
unsigned outstanding_extents;
|
||||
|
||||
lockdep_assert_held(&inode->lock);
|
||||
outstanding_extents = inode->outstanding_extents;
|
||||
if (outstanding_extents)
|
||||
reserve_size = btrfs_calc_trans_metadata_size(fs_info,
|
||||
outstanding_extents + 1);
|
||||
csum_leaves = btrfs_csum_bytes_to_leaves(fs_info,
|
||||
inode->csum_bytes);
|
||||
reserve_size += btrfs_calc_trans_metadata_size(fs_info,
|
||||
csum_leaves);
|
||||
/*
|
||||
* For qgroup rsv, the calculation is very simple:
|
||||
* account one nodesize for each outstanding extent
|
||||
*
|
||||
* This is overestimating in most cases.
|
||||
*/
|
||||
qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
block_rsv->size = reserve_size;
|
||||
block_rsv->qgroup_rsv_size = qgroup_rsv_size;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
}
|
||||
|
||||
static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
|
||||
u64 num_bytes, u64 *meta_reserve,
|
||||
u64 *qgroup_reserve)
|
||||
{
|
||||
u64 nr_extents = count_max_extents(num_bytes);
|
||||
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
|
||||
|
||||
/* We add one for the inode update at finish ordered time */
|
||||
*meta_reserve = btrfs_calc_trans_metadata_size(fs_info,
|
||||
nr_extents + csum_leaves + 1);
|
||||
*qgroup_reserve = nr_extents * fs_info->nodesize;
|
||||
}
|
||||
|
||||
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
|
||||
u64 meta_reserve, qgroup_reserve;
|
||||
unsigned nr_extents;
|
||||
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
|
||||
int ret = 0;
|
||||
bool delalloc_lock = true;
|
||||
|
||||
/*
|
||||
* If we are a free space inode we need to not flush since we will be in
|
||||
* the middle of a transaction commit. We also don't need the delalloc
|
||||
* mutex since we won't race with anybody. We need this mostly to make
|
||||
* lockdep shut its filthy mouth.
|
||||
*
|
||||
* If we have a transaction open (can happen if we call truncate_block
|
||||
* from truncate), then we need FLUSH_LIMIT so we don't deadlock.
|
||||
*/
|
||||
if (btrfs_is_free_space_inode(inode)) {
|
||||
flush = BTRFS_RESERVE_NO_FLUSH;
|
||||
delalloc_lock = false;
|
||||
} else {
|
||||
if (current->journal_info)
|
||||
flush = BTRFS_RESERVE_FLUSH_LIMIT;
|
||||
|
||||
if (btrfs_transaction_in_commit(fs_info))
|
||||
schedule_timeout(1);
|
||||
}
|
||||
|
||||
if (delalloc_lock)
|
||||
mutex_lock(&inode->delalloc_mutex);
|
||||
|
||||
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
|
||||
|
||||
/*
|
||||
* We always want to do it this way, every other way is wrong and ends
|
||||
* in tears. Pre-reserving the amount we are going to add will always
|
||||
* be the right way, because otherwise if we have enough parallelism we
|
||||
* could end up with thousands of inodes all holding little bits of
|
||||
* reservations they were able to make previously and the only way to
|
||||
* reclaim that space is to ENOSPC out the operations and clear
|
||||
* everything out and try again, which is bad. This way we just
|
||||
* over-reserve slightly, and clean up the mess when we are done.
|
||||
*/
|
||||
calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
|
||||
&qgroup_reserve);
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
|
||||
if (ret)
|
||||
goto out_fail;
|
||||
ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
|
||||
if (ret)
|
||||
goto out_qgroup;
|
||||
|
||||
/*
|
||||
* Now we need to update our outstanding extents and csum bytes _first_
|
||||
* and then add the reservation to the block_rsv. This keeps us from
|
||||
* racing with an ordered completion or some such that would think it
|
||||
* needs to free the reservation we just made.
|
||||
*/
|
||||
spin_lock(&inode->lock);
|
||||
nr_extents = count_max_extents(num_bytes);
|
||||
btrfs_mod_outstanding_extents(inode, nr_extents);
|
||||
inode->csum_bytes += num_bytes;
|
||||
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
/* Now we can safely add our space to our block rsv */
|
||||
btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false);
|
||||
trace_btrfs_space_reservation(root->fs_info, "delalloc",
|
||||
btrfs_ino(inode), meta_reserve, 1);
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
block_rsv->qgroup_rsv_reserved += qgroup_reserve;
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
if (delalloc_lock)
|
||||
mutex_unlock(&inode->delalloc_mutex);
|
||||
return 0;
|
||||
out_qgroup:
|
||||
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
|
||||
out_fail:
|
||||
btrfs_inode_rsv_release(inode, true);
|
||||
if (delalloc_lock)
|
||||
mutex_unlock(&inode->delalloc_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delalloc_release_metadata - release a metadata reservation for an inode
|
||||
* @inode: the inode to release the reservation for.
|
||||
* @num_bytes: the number of bytes we are releasing.
|
||||
* @qgroup_free: free qgroup reservation or convert it to per-trans reservation
|
||||
*
|
||||
* This will release the metadata reservation for an inode. This can be called
|
||||
* once we complete IO for a given set of bytes to release their metadata
|
||||
* reservations, or on error for the same reason.
|
||||
*/
|
||||
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
bool qgroup_free)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
|
||||
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
|
||||
spin_lock(&inode->lock);
|
||||
inode->csum_bytes -= num_bytes;
|
||||
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
if (btrfs_is_testing(fs_info))
|
||||
return;
|
||||
|
||||
btrfs_inode_rsv_release(inode, qgroup_free);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delalloc_release_extents - release our outstanding_extents
|
||||
* @inode: the inode to balance the reservation for.
|
||||
* @num_bytes: the number of bytes we originally reserved with
|
||||
* @qgroup_free: do we need to free qgroup meta reservation or convert them.
|
||||
*
|
||||
* When we reserve space we increase outstanding_extents for the extents we may
|
||||
* add. Once we've set the range as delalloc or created our ordered extents we
|
||||
* have outstanding_extents to track the real usage, so we use this to free our
|
||||
* temporarily tracked outstanding_extents. This _must_ be used in conjunction
|
||||
* with btrfs_delalloc_reserve_metadata.
|
||||
*/
|
||||
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
|
||||
bool qgroup_free)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
unsigned num_extents;
|
||||
|
||||
spin_lock(&inode->lock);
|
||||
num_extents = count_max_extents(num_bytes);
|
||||
btrfs_mod_outstanding_extents(inode, -num_extents);
|
||||
btrfs_calculate_inode_block_rsv_size(fs_info, inode);
|
||||
spin_unlock(&inode->lock);
|
||||
|
||||
if (btrfs_is_testing(fs_info))
|
||||
return;
|
||||
|
||||
btrfs_inode_rsv_release(inode, qgroup_free);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delalloc_reserve_space - reserve data and metadata space for
|
||||
* delalloc
|
||||
* @inode: inode we're writing to
|
||||
* @start: start range we are writing to
|
||||
* @len: how long the range we are writing to
|
||||
* @reserved: mandatory parameter, record actually reserved qgroup ranges of
|
||||
* current reservation.
|
||||
*
|
||||
* This will do the following things
|
||||
*
|
||||
* - reserve space in data space info for num bytes
|
||||
* and reserve precious corresponding qgroup space
|
||||
* (Done in check_data_free_space)
|
||||
*
|
||||
* - reserve space for metadata space, based on the number of outstanding
|
||||
* extents and how much csums will be needed
|
||||
* also reserve metadata space in a per root over-reserve method.
|
||||
* - add to the inodes->delalloc_bytes
|
||||
* - add it to the fs_info's delalloc inodes list.
|
||||
* (Above 3 all done in delalloc_reserve_metadata)
|
||||
*
|
||||
* Return 0 for success
|
||||
* Return <0 for error(-ENOSPC or -EQUOT)
|
||||
*/
|
||||
int btrfs_delalloc_reserve_space(struct inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_check_data_free_space(inode, reserved, start, len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), len);
|
||||
if (ret < 0)
|
||||
btrfs_free_reserved_data_space(inode, *reserved, start, len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delalloc_release_space - release data and metadata space for delalloc
|
||||
* @inode: inode we're releasing space for
|
||||
* @start: start position of the space already reserved
|
||||
* @len: the len of the space already reserved
|
||||
* @release_bytes: the len of the space we consumed or didn't use
|
||||
*
|
||||
* This function will release the metadata space that was not used and will
|
||||
* decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes
|
||||
* list if there are no delalloc bytes left.
|
||||
* Also it will handle the qgroup reserved space.
|
||||
*/
|
||||
void btrfs_delalloc_release_space(struct inode *inode,
|
||||
struct extent_changeset *reserved,
|
||||
u64 start, u64 len, bool qgroup_free)
|
||||
{
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode), len, qgroup_free);
|
||||
btrfs_free_reserved_data_space(inode, reserved, start, len);
|
||||
}
|
23
fs/btrfs/delalloc-space.h
Normal file
23
fs/btrfs/delalloc-space.h
Normal file
@ -0,0 +1,23 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_DELALLOC_SPACE_H
|
||||
#define BTRFS_DELALLOC_SPACE_H
|
||||
|
||||
struct extent_changeset;
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
|
||||
int btrfs_check_data_free_space(struct inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
void btrfs_free_reserved_data_space(struct inode *inode,
|
||||
struct extent_changeset *reserved, u64 start, u64 len);
|
||||
void btrfs_delalloc_release_space(struct inode *inode,
|
||||
struct extent_changeset *reserved,
|
||||
u64 start, u64 len, bool qgroup_free);
|
||||
void btrfs_free_reserved_data_space_noquota(struct inode *inode, u64 start,
|
||||
u64 len);
|
||||
void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
|
||||
bool qgroup_free);
|
||||
int btrfs_delalloc_reserve_space(struct inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
|
||||
#endif /* BTRFS_DELALLOC_SPACE_H */
|
@ -10,6 +10,7 @@
|
||||
#include "delayed-ref.h"
|
||||
#include "transaction.h"
|
||||
#include "qgroup.h"
|
||||
#include "space-info.h"
|
||||
|
||||
struct kmem_cache *btrfs_delayed_ref_head_cachep;
|
||||
struct kmem_cache *btrfs_delayed_tree_ref_cachep;
|
||||
@ -24,6 +25,179 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
|
||||
* of hammering updates on the extent allocation tree.
|
||||
*/
|
||||
|
||||
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
bool ret = false;
|
||||
u64 reserved;
|
||||
|
||||
spin_lock(&global_rsv->lock);
|
||||
reserved = global_rsv->reserved;
|
||||
spin_unlock(&global_rsv->lock);
|
||||
|
||||
/*
|
||||
* Since the global reserve is just kind of magic we don't really want
|
||||
* to rely on it to save our bacon, so if our size is more than the
|
||||
* delayed_refs_rsv and the global rsv then it's time to think about
|
||||
* bailing.
|
||||
*/
|
||||
spin_lock(&delayed_refs_rsv->lock);
|
||||
reserved += delayed_refs_rsv->reserved;
|
||||
if (delayed_refs_rsv->size >= reserved)
|
||||
ret = true;
|
||||
spin_unlock(&delayed_refs_rsv->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
u64 num_entries =
|
||||
atomic_read(&trans->transaction->delayed_refs.num_entries);
|
||||
u64 avg_runtime;
|
||||
u64 val;
|
||||
|
||||
smp_mb();
|
||||
avg_runtime = trans->fs_info->avg_delayed_ref_runtime;
|
||||
val = num_entries * avg_runtime;
|
||||
if (val >= NSEC_PER_SEC)
|
||||
return 1;
|
||||
if (val >= NSEC_PER_SEC / 2)
|
||||
return 2;
|
||||
|
||||
return btrfs_check_space_for_delayed_refs(trans->fs_info);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delayed_refs_rsv_release - release a ref head's reservation.
|
||||
* @fs_info - the fs_info for our fs.
|
||||
* @nr - the number of items to drop.
|
||||
*
|
||||
* This drops the delayed ref head's count from the delayed refs rsv and frees
|
||||
* any excess reservation we had.
|
||||
*/
|
||||
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
|
||||
u64 num_bytes = btrfs_calc_trans_metadata_size(fs_info, nr);
|
||||
u64 released = 0;
|
||||
|
||||
released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes,
|
||||
NULL);
|
||||
if (released)
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
|
||||
0, released, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_update_delayed_refs_rsv - adjust the size of the delayed refs rsv
|
||||
* @trans - the trans that may have generated delayed refs
|
||||
*
|
||||
* This is to be called anytime we may have adjusted trans->delayed_ref_updates,
|
||||
* it'll calculate the additional size and add it to the delayed_refs_rsv.
|
||||
*/
|
||||
void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
|
||||
u64 num_bytes;
|
||||
|
||||
if (!trans->delayed_ref_updates)
|
||||
return;
|
||||
|
||||
num_bytes = btrfs_calc_trans_metadata_size(fs_info,
|
||||
trans->delayed_ref_updates);
|
||||
spin_lock(&delayed_rsv->lock);
|
||||
delayed_rsv->size += num_bytes;
|
||||
delayed_rsv->full = 0;
|
||||
spin_unlock(&delayed_rsv->lock);
|
||||
trans->delayed_ref_updates = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_migrate_to_delayed_refs_rsv - transfer bytes to our delayed refs rsv.
|
||||
* @fs_info - the fs info for our fs.
|
||||
* @src - the source block rsv to transfer from.
|
||||
* @num_bytes - the number of bytes to transfer.
|
||||
*
|
||||
* This transfers up to the num_bytes amount from the src rsv to the
|
||||
* delayed_refs_rsv. Any extra bytes are returned to the space info.
|
||||
*/
|
||||
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *src,
|
||||
u64 num_bytes)
|
||||
{
|
||||
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
|
||||
u64 to_free = 0;
|
||||
|
||||
spin_lock(&src->lock);
|
||||
src->reserved -= num_bytes;
|
||||
src->size -= num_bytes;
|
||||
spin_unlock(&src->lock);
|
||||
|
||||
spin_lock(&delayed_refs_rsv->lock);
|
||||
if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
|
||||
u64 delta = delayed_refs_rsv->size -
|
||||
delayed_refs_rsv->reserved;
|
||||
if (num_bytes > delta) {
|
||||
to_free = num_bytes - delta;
|
||||
num_bytes = delta;
|
||||
}
|
||||
} else {
|
||||
to_free = num_bytes;
|
||||
num_bytes = 0;
|
||||
}
|
||||
|
||||
if (num_bytes)
|
||||
delayed_refs_rsv->reserved += num_bytes;
|
||||
if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
|
||||
delayed_refs_rsv->full = 1;
|
||||
spin_unlock(&delayed_refs_rsv->lock);
|
||||
|
||||
if (num_bytes)
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
|
||||
0, num_bytes, 1);
|
||||
if (to_free)
|
||||
btrfs_space_info_add_old_bytes(fs_info,
|
||||
delayed_refs_rsv->space_info, to_free);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_delayed_refs_rsv_refill - refill based on our delayed refs usage.
|
||||
* @fs_info - the fs_info for our fs.
|
||||
* @flush - control how we can flush for this reservation.
|
||||
*
|
||||
* This will refill the delayed block_rsv up to 1 items size worth of space and
|
||||
* will return -ENOSPC if we can't make the reservation.
|
||||
*/
|
||||
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
struct btrfs_block_rsv *block_rsv = &fs_info->delayed_refs_rsv;
|
||||
u64 limit = btrfs_calc_trans_metadata_size(fs_info, 1);
|
||||
u64 num_bytes = 0;
|
||||
int ret = -ENOSPC;
|
||||
|
||||
spin_lock(&block_rsv->lock);
|
||||
if (block_rsv->reserved < block_rsv->size) {
|
||||
num_bytes = block_rsv->size - block_rsv->reserved;
|
||||
num_bytes = min(num_bytes, limit);
|
||||
}
|
||||
spin_unlock(&block_rsv->lock);
|
||||
|
||||
if (!num_bytes)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_reserve_metadata_bytes(fs_info->extent_root, block_rsv,
|
||||
num_bytes, flush);
|
||||
if (ret)
|
||||
return ret;
|
||||
btrfs_block_rsv_add_bytes(block_rsv, num_bytes, 0);
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
|
||||
0, num_bytes, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* compare two delayed tree backrefs with same bytenr and type
|
||||
*/
|
||||
@ -957,13 +1131,14 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
/*
|
||||
* this does a simple search for the head node for a given extent.
|
||||
* It must be called with the delayed ref spinlock held, and it returns
|
||||
* the head node if any where found, or NULL if not.
|
||||
* This does a simple search for the head node for a given extent. Returns the
|
||||
* head node if found, or NULL if not.
|
||||
*/
|
||||
struct btrfs_delayed_ref_head *
|
||||
btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
|
||||
{
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
|
||||
return find_ref_head(delayed_refs, bytenr, false);
|
||||
}
|
||||
|
||||
|
@ -364,6 +364,16 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
|
||||
|
||||
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
|
||||
|
||||
void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr);
|
||||
void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans);
|
||||
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *src,
|
||||
u64 num_bytes);
|
||||
int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans);
|
||||
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/*
|
||||
* helper functions to cast a node into its container
|
||||
*/
|
||||
|
@ -201,7 +201,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
return PTR_ERR(bdev);
|
||||
}
|
||||
|
||||
filemap_write_and_wait(bdev->bd_inode->i_mapping);
|
||||
sync_blockdev(bdev);
|
||||
|
||||
devices = &fs_info->fs_devices->devices;
|
||||
list_for_each_entry(device, devices, dev_list) {
|
||||
@ -237,7 +237,6 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
rcu_assign_pointer(device->name, name);
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
device->generation = 0;
|
||||
device->io_width = fs_info->sectorsize;
|
||||
@ -256,6 +255,8 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
|
||||
device->dev_stats_valid = 1;
|
||||
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
|
||||
device->fs_devices = fs_info->fs_devices;
|
||||
|
||||
mutex_lock(&fs_info->fs_devices->device_list_mutex);
|
||||
list_add(&device->dev_list, &fs_info->fs_devices->devices);
|
||||
fs_info->fs_devices->num_devices++;
|
||||
fs_info->fs_devices->open_devices++;
|
||||
@ -399,7 +400,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
int ret;
|
||||
struct btrfs_device *tgt_device = NULL;
|
||||
struct btrfs_device *src_device = NULL;
|
||||
bool need_unlock;
|
||||
|
||||
src_device = btrfs_find_device_by_devspec(fs_info, srcdevid,
|
||||
srcdev_name);
|
||||
@ -413,11 +413,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
return -ETXTBSY;
|
||||
}
|
||||
|
||||
ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
|
||||
src_device, &tgt_device);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Here we commit the transaction to make sure commit_total_bytes
|
||||
* of all the devices are updated.
|
||||
@ -431,7 +426,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
|
||||
need_unlock = true;
|
||||
ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
|
||||
src_device, &tgt_device);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
down_write(&dev_replace->rwsem);
|
||||
switch (dev_replace->replace_state) {
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
|
||||
@ -442,11 +441,11 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
|
||||
ASSERT(0);
|
||||
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED;
|
||||
up_write(&dev_replace->rwsem);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
dev_replace->cont_reading_from_srcdev_mode = read_src;
|
||||
WARN_ON(!src_device);
|
||||
dev_replace->srcdev = src_device;
|
||||
dev_replace->tgtdev = tgt_device;
|
||||
|
||||
@ -471,7 +470,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
atomic64_set(&dev_replace->num_write_errors, 0);
|
||||
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
|
||||
up_write(&dev_replace->rwsem);
|
||||
need_unlock = false;
|
||||
|
||||
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
|
||||
if (ret)
|
||||
@ -479,16 +477,16 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
|
||||
/* force writing the updated state information to disk */
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
/* Commit dev_replace state and reserve 1 item for it. */
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
need_unlock = true;
|
||||
down_write(&dev_replace->rwsem);
|
||||
dev_replace->replace_state =
|
||||
BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED;
|
||||
dev_replace->srcdev = NULL;
|
||||
dev_replace->tgtdev = NULL;
|
||||
up_write(&dev_replace->rwsem);
|
||||
goto leave;
|
||||
}
|
||||
|
||||
@ -510,8 +508,6 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
|
||||
leave:
|
||||
if (need_unlock)
|
||||
up_write(&dev_replace->rwsem);
|
||||
btrfs_destroy_dev_replace_tgtdev(tgt_device);
|
||||
return ret;
|
||||
}
|
||||
@ -678,7 +674,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
btrfs_device_set_disk_total_bytes(tgt_device,
|
||||
src_device->disk_total_bytes);
|
||||
btrfs_device_set_bytes_used(tgt_device, src_device->bytes_used);
|
||||
tgt_device->commit_total_bytes = src_device->commit_total_bytes;
|
||||
tgt_device->commit_bytes_used = src_device->bytes_used;
|
||||
|
||||
btrfs_assign_next_active_device(src_device, tgt_device);
|
||||
@ -728,7 +723,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
|
||||
struct btrfs_device *srcdev,
|
||||
struct btrfs_device *tgtdev)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
u64 start = 0;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
@ -40,10 +41,6 @@
|
||||
#include "tree-checker.h"
|
||||
#include "ref-verify.h"
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
#include <asm/cpufeature.h>
|
||||
#endif
|
||||
|
||||
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
|
||||
BTRFS_HEADER_FLAG_RELOC |\
|
||||
BTRFS_SUPER_FLAG_ERROR |\
|
||||
@ -249,16 +246,6 @@ out:
|
||||
return em;
|
||||
}
|
||||
|
||||
u32 btrfs_csum_data(const char *data, u32 seed, size_t len)
|
||||
{
|
||||
return crc32c(seed, data, len);
|
||||
}
|
||||
|
||||
void btrfs_csum_final(u32 crc, u8 *result)
|
||||
{
|
||||
put_unaligned_le32(~crc, result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the csum of a btree block and store the result to provided buffer.
|
||||
*
|
||||
@ -266,6 +253,8 @@ void btrfs_csum_final(u32 crc, u8 *result)
|
||||
*/
|
||||
static int csum_tree_block(struct extent_buffer *buf, u8 *result)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = buf->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
unsigned long len;
|
||||
unsigned long cur_len;
|
||||
unsigned long offset = BTRFS_CSUM_SIZE;
|
||||
@ -273,9 +262,12 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result)
|
||||
unsigned long map_start;
|
||||
unsigned long map_len;
|
||||
int err;
|
||||
u32 crc = ~(u32)0;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
|
||||
len = buf->len - offset;
|
||||
|
||||
while (len > 0) {
|
||||
/*
|
||||
* Note: we don't need to check for the err == 1 case here, as
|
||||
@ -288,14 +280,13 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result)
|
||||
if (WARN_ON(err))
|
||||
return err;
|
||||
cur_len = min(len, map_len - (offset - map_start));
|
||||
crc = btrfs_csum_data(kaddr + offset - map_start,
|
||||
crc, cur_len);
|
||||
crypto_shash_update(shash, kaddr + offset - map_start, cur_len);
|
||||
len -= cur_len;
|
||||
offset += cur_len;
|
||||
}
|
||||
memset(result, 0, BTRFS_CSUM_SIZE);
|
||||
|
||||
btrfs_csum_final(crc, result);
|
||||
crypto_shash_final(shash, result);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -356,6 +347,16 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool btrfs_supported_super_csum(u16 csum_type)
|
||||
{
|
||||
switch (csum_type) {
|
||||
case BTRFS_CSUM_TYPE_CRC32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 0 if the superblock checksum type matches the checksum value of that
|
||||
* algorithm. Pass the raw disk superblock data.
|
||||
@ -365,33 +366,25 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
struct btrfs_super_block *disk_sb =
|
||||
(struct btrfs_super_block *)raw_disk_sb;
|
||||
u16 csum_type = btrfs_super_csum_type(disk_sb);
|
||||
int ret = 0;
|
||||
char result[BTRFS_CSUM_SIZE];
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
|
||||
if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
|
||||
u32 crc = ~(u32)0;
|
||||
char result[sizeof(crc)];
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
|
||||
/*
|
||||
* The super_block structure does not span the whole
|
||||
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
|
||||
* is filled with zeros and is included in the checksum.
|
||||
*/
|
||||
crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
|
||||
crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
||||
btrfs_csum_final(crc, result);
|
||||
/*
|
||||
* The super_block structure does not span the whole
|
||||
* BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
|
||||
* filled with zeros and is included in the checksum.
|
||||
*/
|
||||
crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE,
|
||||
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
||||
crypto_shash_final(shash, result);
|
||||
|
||||
if (memcmp(raw_disk_sb, result, sizeof(result)))
|
||||
ret = 1;
|
||||
}
|
||||
if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb)))
|
||||
return 1;
|
||||
|
||||
if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
|
||||
btrfs_err(fs_info, "unsupported checksum algorithm %u",
|
||||
csum_type);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
@ -873,14 +866,13 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio,
|
||||
return btree_csum_one_bio(bio);
|
||||
}
|
||||
|
||||
static int check_async_write(struct btrfs_inode *bi)
|
||||
static int check_async_write(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_inode *bi)
|
||||
{
|
||||
if (atomic_read(&bi->sync_writers))
|
||||
return 0;
|
||||
#ifdef CONFIG_X86
|
||||
if (static_cpu_has(X86_FEATURE_XMM4_2))
|
||||
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
|
||||
return 0;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -889,7 +881,7 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
|
||||
unsigned long bio_flags)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
int async = check_async_write(BTRFS_I(inode));
|
||||
int async = check_async_write(fs_info, BTRFS_I(inode));
|
||||
blk_status_t ret;
|
||||
|
||||
if (bio_op(bio) != REQ_OP_WRITE) {
|
||||
@ -2262,6 +2254,29 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
|
||||
{
|
||||
struct crypto_shash *csum_shash;
|
||||
const char *csum_name = btrfs_super_csum_name(csum_type);
|
||||
|
||||
csum_shash = crypto_alloc_shash(csum_name, 0, 0);
|
||||
|
||||
if (IS_ERR(csum_shash)) {
|
||||
btrfs_err(fs_info, "error allocating %s hash for checksum",
|
||||
csum_name);
|
||||
return PTR_ERR(csum_shash);
|
||||
}
|
||||
|
||||
fs_info->csum_shash = csum_shash;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
crypto_free_shash(fs_info->csum_shash);
|
||||
}
|
||||
|
||||
static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_fs_devices *fs_devices)
|
||||
{
|
||||
@ -2577,7 +2592,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
|
||||
ret = validate_super(fs_info, sb, -1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) {
|
||||
if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
|
||||
ret = -EUCLEAN;
|
||||
btrfs_err(fs_info, "invalid csum type, has %u want %u",
|
||||
btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
|
||||
@ -2607,6 +2622,7 @@ int open_ctree(struct super_block *sb,
|
||||
u32 stripesize;
|
||||
u64 generation;
|
||||
u64 features;
|
||||
u16 csum_type;
|
||||
struct btrfs_key location;
|
||||
struct buffer_head *bh;
|
||||
struct btrfs_super_block *disk_super;
|
||||
@ -2689,7 +2705,7 @@ int open_ctree(struct super_block *sb,
|
||||
INIT_LIST_HEAD(&fs_info->space_info);
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
||||
btrfs_mapping_init(&fs_info->mapping_tree);
|
||||
extent_map_tree_init(&fs_info->mapping_tree);
|
||||
btrfs_init_block_rsv(&fs_info->global_block_rsv,
|
||||
BTRFS_BLOCK_RSV_GLOBAL);
|
||||
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
|
||||
@ -2793,6 +2809,8 @@ int open_ctree(struct super_block *sb,
|
||||
spin_lock_init(&fs_info->swapfile_pins_lock);
|
||||
fs_info->swapfile_pins = RB_ROOT;
|
||||
|
||||
fs_info->send_in_progress = 0;
|
||||
|
||||
ret = btrfs_alloc_stripe_hash_table(fs_info);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
@ -2812,6 +2830,25 @@ int open_ctree(struct super_block *sb,
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify the type first, if that or the the checksum value are
|
||||
* corrupted, we'll find out
|
||||
*/
|
||||
csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data);
|
||||
if (!btrfs_supported_super_csum(csum_type)) {
|
||||
btrfs_err(fs_info, "unsupported checksum algorithm: %u",
|
||||
csum_type);
|
||||
err = -EINVAL;
|
||||
brelse(bh);
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
ret = btrfs_init_csum_hash(fs_info, csum_type);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to check superblock checksum, the type is stored inside.
|
||||
* Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
|
||||
@ -2820,7 +2857,7 @@ int open_ctree(struct super_block *sb,
|
||||
btrfs_err(fs_info, "superblock checksum mismatch");
|
||||
err = -EINVAL;
|
||||
brelse(bh);
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2857,11 +2894,11 @@ int open_ctree(struct super_block *sb,
|
||||
if (ret) {
|
||||
btrfs_err(fs_info, "superblock contains fatal errors");
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
}
|
||||
|
||||
if (!btrfs_super_root(disk_super))
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
|
||||
/* check FS state, whether FS is broken. */
|
||||
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
|
||||
@ -2883,7 +2920,7 @@ int open_ctree(struct super_block *sb,
|
||||
ret = btrfs_parse_options(fs_info, options, sb->s_flags);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super) &
|
||||
@ -2893,7 +2930,7 @@ int open_ctree(struct super_block *sb,
|
||||
"cannot mount because of unsupported optional features (%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
@ -2937,7 +2974,7 @@ int open_ctree(struct super_block *sb,
|
||||
btrfs_err(fs_info,
|
||||
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
|
||||
nodesize, sectorsize);
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2953,7 +2990,7 @@ int open_ctree(struct super_block *sb,
|
||||
"cannot mount read-write because of unsupported optional features (%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
goto fail_csum;
|
||||
}
|
||||
|
||||
ret = btrfs_init_workqueues(fs_info, fs_devices);
|
||||
@ -3331,6 +3368,8 @@ fail_tree_roots:
|
||||
fail_sb_buffer:
|
||||
btrfs_stop_all_workers(fs_info);
|
||||
btrfs_free_block_groups(fs_info);
|
||||
fail_csum:
|
||||
btrfs_free_csum_hash(fs_info);
|
||||
fail_alloc:
|
||||
fail_iput:
|
||||
btrfs_mapping_tree_free(&fs_info->mapping_tree);
|
||||
@ -3472,17 +3511,20 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
|
||||
static int write_dev_supers(struct btrfs_device *device,
|
||||
struct btrfs_super_block *sb, int max_mirrors)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = device->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
struct buffer_head *bh;
|
||||
int i;
|
||||
int ret;
|
||||
int errors = 0;
|
||||
u32 crc;
|
||||
u64 bytenr;
|
||||
int op_flags;
|
||||
|
||||
if (max_mirrors == 0)
|
||||
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
for (i = 0; i < max_mirrors; i++) {
|
||||
bytenr = btrfs_sb_offset(i);
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
|
||||
@ -3491,10 +3533,10 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||
|
||||
btrfs_set_super_bytenr(sb, bytenr);
|
||||
|
||||
crc = ~(u32)0;
|
||||
crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc,
|
||||
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
||||
btrfs_csum_final(crc, sb->csum);
|
||||
crypto_shash_init(shash);
|
||||
crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE,
|
||||
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
||||
crypto_shash_final(shash, sb->csum);
|
||||
|
||||
/* One reference for us, and we leave it for the caller */
|
||||
bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
|
||||
@ -3709,7 +3751,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
|
||||
|
||||
if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
|
||||
(flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
|
||||
min_tolerated = min(min_tolerated,
|
||||
min_tolerated = min_t(int, min_tolerated,
|
||||
btrfs_raid_array[BTRFS_RAID_SINGLE].
|
||||
tolerated_failures);
|
||||
|
||||
@ -3718,7 +3760,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
|
||||
continue;
|
||||
if (!(flags & btrfs_raid_array[raid_type].bg_flag))
|
||||
continue;
|
||||
min_tolerated = min(min_tolerated,
|
||||
min_tolerated = min_t(int, min_tolerated,
|
||||
btrfs_raid_array[raid_type].
|
||||
tolerated_failures);
|
||||
}
|
||||
|
@ -115,8 +115,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level,
|
||||
struct btrfs_key *first_key);
|
||||
u32 btrfs_csum_data(const char *data, u32 seed, size_t len);
|
||||
void btrfs_csum_final(u32 crc, u8 *result);
|
||||
blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
|
||||
enum btrfs_wq_endio_type metadata);
|
||||
blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -359,6 +359,24 @@ do_insert:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* __etree_search - searche @tree for an entry that contains @offset. Such
|
||||
* entry would have entry->start <= offset && entry->end >= offset.
|
||||
*
|
||||
* @tree - the tree to search
|
||||
* @offset - offset that should fall within an entry in @tree
|
||||
* @next_ret - pointer to the first entry whose range ends after @offset
|
||||
* @prev - pointer to the first entry whose range begins before @offset
|
||||
* @p_ret - pointer where new node should be anchored (used when inserting an
|
||||
* entry in the tree)
|
||||
* @parent_ret - points to entry which would have been the parent of the entry,
|
||||
* containing @offset
|
||||
*
|
||||
* This function returns a pointer to the entry that contains @offset byte
|
||||
* address. If no such entry exists, then NULL is returned and the other
|
||||
* pointer arguments to the function are filled, otherwise the found entry is
|
||||
* returned and other pointers are left untouched.
|
||||
*/
|
||||
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
|
||||
struct rb_node **next_ret,
|
||||
struct rb_node **prev_ret,
|
||||
@ -504,9 +522,11 @@ static int insert_state(struct extent_io_tree *tree,
|
||||
{
|
||||
struct rb_node *node;
|
||||
|
||||
if (end < start)
|
||||
WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
|
||||
end, start);
|
||||
if (end < start) {
|
||||
btrfs_err(tree->fs_info,
|
||||
"insert state: end < start %llu %llu", end, start);
|
||||
WARN_ON(1);
|
||||
}
|
||||
state->start = start;
|
||||
state->end = end;
|
||||
|
||||
@ -516,7 +536,8 @@ static int insert_state(struct extent_io_tree *tree,
|
||||
if (node) {
|
||||
struct extent_state *found;
|
||||
found = rb_entry(node, struct extent_state, rb_node);
|
||||
pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
|
||||
btrfs_err(tree->fs_info,
|
||||
"found node %llu %llu on insert of %llu %llu",
|
||||
found->start, found->end, start, end);
|
||||
return -EEXIST;
|
||||
}
|
||||
@ -1537,8 +1558,8 @@ out:
|
||||
}
|
||||
|
||||
/**
|
||||
* find_first_clear_extent_bit - finds the first range that has @bits not set
|
||||
* and that starts after @start
|
||||
* find_first_clear_extent_bit - find the first range that has @bits not set.
|
||||
* This range could start before @start.
|
||||
*
|
||||
* @tree - the tree to search
|
||||
* @start - the offset at/after which the found extent should start
|
||||
@ -1578,12 +1599,52 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* At this point 'node' either contains 'start' or start is
|
||||
* before 'node'
|
||||
*/
|
||||
state = rb_entry(node, struct extent_state, rb_node);
|
||||
if (in_range(start, state->start, state->end - state->start + 1) &&
|
||||
(state->state & bits)) {
|
||||
start = state->end + 1;
|
||||
|
||||
if (in_range(start, state->start, state->end - state->start + 1)) {
|
||||
if (state->state & bits) {
|
||||
/*
|
||||
* |--range with bits sets--|
|
||||
* |
|
||||
* start
|
||||
*/
|
||||
start = state->end + 1;
|
||||
} else {
|
||||
/*
|
||||
* 'start' falls within a range that doesn't
|
||||
* have the bits set, so take its start as
|
||||
* the beginning of the desired range
|
||||
*
|
||||
* |--range with bits cleared----|
|
||||
* |
|
||||
* start
|
||||
*/
|
||||
*start_ret = state->start;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
*start_ret = start;
|
||||
/*
|
||||
* |---prev range---|---hole/unset---|---node range---|
|
||||
* |
|
||||
* start
|
||||
*
|
||||
* or
|
||||
*
|
||||
* |---hole/unset--||--first node--|
|
||||
* 0 |
|
||||
* start
|
||||
*/
|
||||
if (prev) {
|
||||
state = rb_entry(prev, struct extent_state,
|
||||
rb_node);
|
||||
*start_ret = state->end + 1;
|
||||
} else {
|
||||
*start_ret = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1719,10 +1780,10 @@ static noinline int lock_delalloc_pages(struct inode *inode,
|
||||
*/
|
||||
EXPORT_FOR_TESTS
|
||||
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
struct extent_io_tree *tree,
|
||||
struct page *locked_page, u64 *start,
|
||||
u64 *end)
|
||||
{
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
|
||||
u64 delalloc_start;
|
||||
u64 delalloc_end;
|
||||
@ -2800,12 +2861,11 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
|
||||
* never fail. We're returning a bio right now but you can call btrfs_io_bio
|
||||
* for the appropriate container_of magic
|
||||
*/
|
||||
struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
|
||||
struct bio *btrfs_bio_alloc(u64 first_byte)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_iter.bi_sector = first_byte >> 9;
|
||||
btrfs_io_bio_init(btrfs_io_bio(bio));
|
||||
return bio;
|
||||
@ -2916,7 +2976,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
}
|
||||
}
|
||||
|
||||
bio = btrfs_bio_alloc(bdev, offset);
|
||||
bio = btrfs_bio_alloc(offset);
|
||||
bio_set_dev(bio, bdev);
|
||||
bio_add_page(bio, page, page_size, pg_offset);
|
||||
bio->bi_end_io = end_io_func;
|
||||
bio->bi_private = tree;
|
||||
@ -3204,21 +3265,10 @@ static inline void contiguous_readpages(struct extent_io_tree *tree,
|
||||
unsigned long *bio_flags,
|
||||
u64 *prev_em_start)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
|
||||
int index;
|
||||
|
||||
inode = pages[0]->mapping->host;
|
||||
while (1) {
|
||||
lock_extent(tree, start, end);
|
||||
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
|
||||
end - start + 1);
|
||||
if (!ordered)
|
||||
break;
|
||||
unlock_extent(tree, start, end);
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
|
||||
|
||||
for (index = 0; index < nr_pages; index++) {
|
||||
__do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
|
||||
@ -3234,22 +3284,12 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
|
||||
unsigned long *bio_flags,
|
||||
unsigned int read_flags)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
|
||||
u64 start = page_offset(page);
|
||||
u64 end = start + PAGE_SIZE - 1;
|
||||
int ret;
|
||||
|
||||
while (1) {
|
||||
lock_extent(tree, start, end);
|
||||
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
|
||||
PAGE_SIZE);
|
||||
if (!ordered)
|
||||
break;
|
||||
unlock_extent(tree, start, end);
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
|
||||
|
||||
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
|
||||
bio_flags, read_flags, NULL);
|
||||
@ -3290,7 +3330,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
|
||||
struct page *page, struct writeback_control *wbc,
|
||||
u64 delalloc_start, unsigned long *nr_written)
|
||||
{
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
u64 page_end = delalloc_start + PAGE_SIZE - 1;
|
||||
bool found;
|
||||
u64 delalloc_to_write = 0;
|
||||
@ -3300,8 +3339,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
|
||||
|
||||
|
||||
while (delalloc_end < page_end) {
|
||||
found = find_lock_delalloc_range(inode, tree,
|
||||
page,
|
||||
found = find_lock_delalloc_range(inode, page,
|
||||
&delalloc_start,
|
||||
&delalloc_end);
|
||||
if (!found) {
|
||||
@ -3310,7 +3348,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
|
||||
}
|
||||
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
|
||||
delalloc_end, &page_started, nr_written, wbc);
|
||||
/* File system has been set read-only */
|
||||
if (ret) {
|
||||
SetPageError(page);
|
||||
/*
|
||||
@ -4542,6 +4579,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct fiemap_cache cache = { 0 };
|
||||
struct ulist *roots;
|
||||
struct ulist *tmp_ulist;
|
||||
int end = 0;
|
||||
u64 em_start = 0;
|
||||
u64 em_len = 0;
|
||||
@ -4555,6 +4594,13 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
return -ENOMEM;
|
||||
path->leave_spinning = 1;
|
||||
|
||||
roots = ulist_alloc(GFP_KERNEL);
|
||||
tmp_ulist = ulist_alloc(GFP_KERNEL);
|
||||
if (!roots || !tmp_ulist) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free_ulist;
|
||||
}
|
||||
|
||||
start = round_down(start, btrfs_inode_sectorsize(inode));
|
||||
len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
|
||||
|
||||
@ -4565,8 +4611,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
ret = btrfs_lookup_file_extent(NULL, root, path,
|
||||
btrfs_ino(BTRFS_I(inode)), -1, 0);
|
||||
if (ret < 0) {
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
goto out_free_ulist;
|
||||
} else {
|
||||
WARN_ON(!ret);
|
||||
if (ret == 1)
|
||||
@ -4675,7 +4720,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
*/
|
||||
ret = btrfs_check_shared(root,
|
||||
btrfs_ino(BTRFS_I(inode)),
|
||||
bytenr);
|
||||
bytenr, roots, tmp_ulist);
|
||||
if (ret < 0)
|
||||
goto out_free;
|
||||
if (ret)
|
||||
@ -4718,9 +4763,13 @@ out_free:
|
||||
ret = emit_last_fiemap_cache(fieinfo, &cache);
|
||||
free_extent_map(em);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
|
||||
&cached_state);
|
||||
|
||||
out_free_ulist:
|
||||
btrfs_free_path(path);
|
||||
ulist_free(roots);
|
||||
ulist_free(tmp_ulist);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4808,7 +4857,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
|
||||
eb->bflags = 0;
|
||||
rwlock_init(&eb->lock);
|
||||
atomic_set(&eb->blocking_readers, 0);
|
||||
atomic_set(&eb->blocking_writers, 0);
|
||||
eb->blocking_writers = 0;
|
||||
eb->lock_nested = false;
|
||||
init_waitqueue_head(&eb->write_lock_wq);
|
||||
init_waitqueue_head(&eb->read_lock_wq);
|
||||
@ -4827,10 +4876,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
|
||||
BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
atomic_set(&eb->spinning_writers, 0);
|
||||
eb->spinning_writers = 0;
|
||||
atomic_set(&eb->spinning_readers, 0);
|
||||
atomic_set(&eb->read_locks, 0);
|
||||
atomic_set(&eb->write_locks, 0);
|
||||
eb->write_locks = 0;
|
||||
#endif
|
||||
|
||||
return eb;
|
||||
|
@ -167,7 +167,7 @@ struct extent_buffer {
|
||||
struct rcu_head rcu_head;
|
||||
pid_t lock_owner;
|
||||
|
||||
atomic_t blocking_writers;
|
||||
int blocking_writers;
|
||||
atomic_t blocking_readers;
|
||||
bool lock_nested;
|
||||
/* >= 0 if eb belongs to a log tree, -1 otherwise */
|
||||
@ -187,10 +187,10 @@ struct extent_buffer {
|
||||
wait_queue_head_t read_lock_wq;
|
||||
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
atomic_t spinning_writers;
|
||||
int spinning_writers;
|
||||
atomic_t spinning_readers;
|
||||
atomic_t read_locks;
|
||||
atomic_t write_locks;
|
||||
int write_locks;
|
||||
struct list_head leak_list;
|
||||
#endif
|
||||
};
|
||||
@ -497,7 +497,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
|
||||
u64 delalloc_end, struct page *locked_page,
|
||||
unsigned bits_to_clear,
|
||||
unsigned long page_ops);
|
||||
struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte);
|
||||
struct bio *btrfs_bio_alloc(u64 first_byte);
|
||||
struct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs);
|
||||
struct bio *btrfs_bio_clone(struct bio *bio);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size);
|
||||
@ -549,7 +549,7 @@ int free_io_failure(struct extent_io_tree *failure_tree,
|
||||
struct extent_io_tree *io_tree,
|
||||
struct io_failure_record *rec);
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
bool find_lock_delalloc_range(struct inode *inode, struct extent_io_tree *tree,
|
||||
bool find_lock_delalloc_range(struct inode *inode,
|
||||
struct page *locked_page, u64 *start,
|
||||
u64 *end);
|
||||
#endif
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
@ -22,9 +23,13 @@
|
||||
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
|
||||
PAGE_SIZE))
|
||||
|
||||
#define MAX_ORDERED_SUM_BYTES(fs_info) ((PAGE_SIZE - \
|
||||
sizeof(struct btrfs_ordered_sum)) / \
|
||||
sizeof(u32) * (fs_info)->sectorsize)
|
||||
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
|
||||
u16 csum_size)
|
||||
{
|
||||
u32 ncsums = (PAGE_SIZE - sizeof(struct btrfs_ordered_sum)) / csum_size;
|
||||
|
||||
return ncsums * fs_info->sectorsize;
|
||||
}
|
||||
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
@ -144,7 +149,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u64 logical_offset, u32 *dst, int dio)
|
||||
u64 logical_offset, u8 *dst, int dio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct bio_vec bvec;
|
||||
@ -182,7 +187,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
|
||||
}
|
||||
csum = btrfs_bio->csum;
|
||||
} else {
|
||||
csum = (u8 *)dst;
|
||||
csum = dst;
|
||||
}
|
||||
|
||||
if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
|
||||
@ -211,7 +216,7 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
|
||||
if (!dio)
|
||||
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
||||
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
|
||||
(u32 *)csum, nblocks);
|
||||
csum, nblocks);
|
||||
if (count)
|
||||
goto found;
|
||||
|
||||
@ -283,7 +288,8 @@ next:
|
||||
return 0;
|
||||
}
|
||||
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u32 *dst)
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u8 *dst)
|
||||
{
|
||||
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
|
||||
}
|
||||
@ -374,7 +380,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct btrfs_csum_item);
|
||||
while (start < csum_end) {
|
||||
size = min_t(size_t, csum_end - start,
|
||||
MAX_ORDERED_SUM_BYTES(fs_info));
|
||||
max_ordered_sum_bytes(fs_info, csum_size));
|
||||
sums = kzalloc(btrfs_ordered_sum_size(fs_info, size),
|
||||
GFP_NOFS);
|
||||
if (!sums) {
|
||||
@ -427,6 +433,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
||||
u64 file_start, int contig)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
struct btrfs_ordered_sum *sums;
|
||||
struct btrfs_ordered_extent *ordered = NULL;
|
||||
char *data;
|
||||
@ -439,6 +446,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
||||
int i;
|
||||
u64 offset;
|
||||
unsigned nofs_flag;
|
||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
|
||||
@ -459,6 +467,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
||||
sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
|
||||
index = 0;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
if (!contig)
|
||||
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
||||
@ -498,17 +508,14 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
||||
index = 0;
|
||||
}
|
||||
|
||||
sums->sums[index] = ~(u32)0;
|
||||
crypto_shash_init(shash);
|
||||
data = kmap_atomic(bvec.bv_page);
|
||||
sums->sums[index]
|
||||
= btrfs_csum_data(data + bvec.bv_offset
|
||||
+ (i * fs_info->sectorsize),
|
||||
sums->sums[index],
|
||||
fs_info->sectorsize);
|
||||
crypto_shash_update(shash, data + bvec.bv_offset
|
||||
+ (i * fs_info->sectorsize),
|
||||
fs_info->sectorsize);
|
||||
kunmap_atomic(data);
|
||||
btrfs_csum_final(sums->sums[index],
|
||||
(char *)(sums->sums + index));
|
||||
index++;
|
||||
crypto_shash_final(shash, (char *)(sums->sums + index));
|
||||
index += csum_size;
|
||||
offset += fs_info->sectorsize;
|
||||
this_sum_bytes += fs_info->sectorsize;
|
||||
total_bytes += fs_info->sectorsize;
|
||||
@ -904,9 +911,9 @@ found:
|
||||
write_extent_buffer(leaf, sums->sums + index, (unsigned long)item,
|
||||
ins_size);
|
||||
|
||||
index += ins_size;
|
||||
ins_size /= csum_size;
|
||||
total_bytes += ins_size * fs_info->sectorsize;
|
||||
index += ins_size;
|
||||
|
||||
btrfs_mark_buffer_dirty(path->nodes[0]);
|
||||
if (total_bytes < sums->len) {
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "volumes.h"
|
||||
#include "qgroup.h"
|
||||
#include "compression.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
static struct kmem_cache *btrfs_inode_defrag_cachep;
|
||||
/*
|
||||
@ -1550,30 +1551,20 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
u64 lockstart, lockend;
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_start_write_no_snapshotting(root);
|
||||
if (!ret)
|
||||
return -ENOSPC;
|
||||
return -EAGAIN;
|
||||
|
||||
lockstart = round_down(pos, fs_info->sectorsize);
|
||||
lockend = round_up(pos + *write_bytes,
|
||||
fs_info->sectorsize) - 1;
|
||||
|
||||
while (1) {
|
||||
lock_extent(&inode->io_tree, lockstart, lockend);
|
||||
ordered = btrfs_lookup_ordered_range(inode, lockstart,
|
||||
lockend - lockstart + 1);
|
||||
if (!ordered) {
|
||||
break;
|
||||
}
|
||||
unlock_extent(&inode->io_tree, lockstart, lockend);
|
||||
btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart,
|
||||
lockend, NULL);
|
||||
|
||||
num_bytes = lockend - lockstart + 1;
|
||||
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
||||
@ -2721,6 +2712,11 @@ out_only_mutex:
|
||||
* for detecting, at fsync time, if the inode isn't yet in the
|
||||
* log tree or it's there but not up to date.
|
||||
*/
|
||||
struct timespec64 now = current_time(inode);
|
||||
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_mtime = now;
|
||||
inode->i_ctime = now;
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
@ -2801,9 +2797,9 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
|
||||
}
|
||||
|
||||
enum {
|
||||
RANGE_BOUNDARY_WRITTEN_EXTENT = 0,
|
||||
RANGE_BOUNDARY_PREALLOC_EXTENT = 1,
|
||||
RANGE_BOUNDARY_HOLE = 2,
|
||||
RANGE_BOUNDARY_WRITTEN_EXTENT,
|
||||
RANGE_BOUNDARY_PREALLOC_EXTENT,
|
||||
RANGE_BOUNDARY_HOLE,
|
||||
};
|
||||
|
||||
static int btrfs_zero_range_check_range_boundary(struct inode *inode,
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include "extent_io.h"
|
||||
#include "inode-map.h"
|
||||
#include "volumes.h"
|
||||
#include "space-info.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
#define BITS_PER_BITMAP (PAGE_SIZE * 8UL)
|
||||
#define MAX_CACHE_BYTES_PER_GIG SZ_32K
|
||||
@ -465,9 +467,8 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
|
||||
if (index == 0)
|
||||
offset = sizeof(u32) * io_ctl->num_pages;
|
||||
|
||||
crc = btrfs_csum_data(io_ctl->orig + offset, crc,
|
||||
PAGE_SIZE - offset);
|
||||
btrfs_csum_final(crc, (u8 *)&crc);
|
||||
crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
|
||||
btrfs_crc32c_final(crc, (u8 *)&crc);
|
||||
io_ctl_unmap_page(io_ctl);
|
||||
tmp = page_address(io_ctl->pages[0]);
|
||||
tmp += index;
|
||||
@ -493,9 +494,8 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
|
||||
val = *tmp;
|
||||
|
||||
io_ctl_map_page(io_ctl, 0);
|
||||
crc = btrfs_csum_data(io_ctl->orig + offset, crc,
|
||||
PAGE_SIZE - offset);
|
||||
btrfs_csum_final(crc, (u8 *)&crc);
|
||||
crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset);
|
||||
btrfs_crc32c_final(crc, (u8 *)&crc);
|
||||
if (val != crc) {
|
||||
btrfs_err_rl(io_ctl->fs_info,
|
||||
"csum mismatch on free space cache");
|
||||
@ -3166,8 +3166,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
|
||||
space_info->bytes_readonly += reserved_bytes;
|
||||
block_group->reserved -= reserved_bytes;
|
||||
space_info->bytes_reserved -= reserved_bytes;
|
||||
spin_unlock(&space_info->lock);
|
||||
spin_unlock(&block_group->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -3358,7 +3358,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
|
||||
|
||||
if (cleanup) {
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
em_tree = &fs_info->mapping_tree.map_tree;
|
||||
em_tree = &fs_info->mapping_tree;
|
||||
write_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
|
||||
1);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "free-space-cache.h"
|
||||
#include "inode-map.h"
|
||||
#include "transaction.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
static int caching_kthread(void *data)
|
||||
{
|
||||
|
109
fs/btrfs/inode.c
109
fs/btrfs/inode.c
@ -47,6 +47,7 @@
|
||||
#include "props.h"
|
||||
#include "qgroup.h"
|
||||
#include "dedupe.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
struct btrfs_iget_args {
|
||||
struct btrfs_key *location;
|
||||
@ -1932,17 +1933,19 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
|
||||
u64 length = 0;
|
||||
u64 map_length;
|
||||
int ret;
|
||||
struct btrfs_io_geometry geom;
|
||||
|
||||
if (bio_flags & EXTENT_BIO_COMPRESSED)
|
||||
return 0;
|
||||
|
||||
length = bio->bi_iter.bi_size;
|
||||
map_length = length;
|
||||
ret = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
|
||||
NULL, 0);
|
||||
ret = btrfs_get_io_geometry(fs_info, btrfs_op(bio), logical, map_length,
|
||||
&geom);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (map_length < length + size)
|
||||
|
||||
if (geom.len < length + size)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@ -3203,16 +3206,23 @@ static int __readpage_endio_check(struct inode *inode,
|
||||
int icsum, struct page *page,
|
||||
int pgoff, u64 start, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
char *kaddr;
|
||||
u32 csum_expected;
|
||||
u32 csum = ~(u32)0;
|
||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||
u8 *csum_expected;
|
||||
u8 csum[BTRFS_CSUM_SIZE];
|
||||
|
||||
csum_expected = *(((u32 *)io_bio->csum) + icsum);
|
||||
csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
|
||||
|
||||
kaddr = kmap_atomic(page);
|
||||
csum = btrfs_csum_data(kaddr + pgoff, csum, len);
|
||||
btrfs_csum_final(csum, (u8 *)&csum);
|
||||
if (csum != csum_expected)
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
crypto_shash_init(shash);
|
||||
crypto_shash_update(shash, kaddr + pgoff, len);
|
||||
crypto_shash_final(shash, csum);
|
||||
|
||||
if (memcmp(csum, csum_expected, csum_size))
|
||||
goto zeroit;
|
||||
|
||||
kunmap_atomic(kaddr);
|
||||
@ -3286,6 +3296,28 @@ void btrfs_add_delayed_iput(struct inode *inode)
|
||||
wake_up_process(fs_info->cleaner_kthread);
|
||||
}
|
||||
|
||||
static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_inode *inode)
|
||||
{
|
||||
list_del_init(&inode->delayed_iput);
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
iput(&inode->vfs_inode);
|
||||
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
|
||||
wake_up(&fs_info->delayed_iputs_wait);
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
|
||||
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_inode *inode)
|
||||
{
|
||||
if (!list_empty(&inode->delayed_iput)) {
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
if (!list_empty(&inode->delayed_iput))
|
||||
run_delayed_iput_locked(fs_info, inode);
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
|
||||
@ -3295,12 +3327,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
|
||||
|
||||
inode = list_first_entry(&fs_info->delayed_iputs,
|
||||
struct btrfs_inode, delayed_iput);
|
||||
list_del_init(&inode->delayed_iput);
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
iput(&inode->vfs_inode);
|
||||
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
|
||||
wake_up(&fs_info->delayed_iputs_wait);
|
||||
spin_lock(&fs_info->delayed_iput_lock);
|
||||
run_delayed_iput_locked(fs_info, inode);
|
||||
}
|
||||
spin_unlock(&fs_info->delayed_iput_lock);
|
||||
}
|
||||
@ -3935,9 +3962,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_path *path;
|
||||
int ret = 0;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_key key;
|
||||
u64 index;
|
||||
u64 ino = btrfs_ino(inode);
|
||||
u64 dir_ino = btrfs_ino(dir);
|
||||
@ -3955,8 +3980,6 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
|
||||
ret = di ? PTR_ERR(di) : -ENOENT;
|
||||
goto err;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
btrfs_dir_item_key_to_cpu(leaf, di, &key);
|
||||
ret = btrfs_delete_one_dir_name(trans, root, path, di);
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -4009,6 +4032,17 @@ skip_backref:
|
||||
ret = 0;
|
||||
else if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
/*
|
||||
* If we have a pending delayed iput we could end up with the final iput
|
||||
* being run in btrfs-cleaner context. If we have enough of these built
|
||||
* up we can end up burning a lot of time in btrfs-cleaner without any
|
||||
* way to throttle the unlinks. Since we're currently holding a ref on
|
||||
* the inode we can run the delayed iput here without any issues as the
|
||||
* final iput won't be done until after we drop the ref we're currently
|
||||
* holding.
|
||||
*/
|
||||
btrfs_run_delayed_iput(fs_info, inode);
|
||||
err:
|
||||
btrfs_free_path(path);
|
||||
if (ret)
|
||||
@ -5008,21 +5042,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
|
||||
if (size <= hole_start)
|
||||
return 0;
|
||||
|
||||
while (1) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
lock_extent_bits(io_tree, hole_start, block_end - 1,
|
||||
&cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), hole_start,
|
||||
block_end - hole_start);
|
||||
if (!ordered)
|
||||
break;
|
||||
unlock_extent_cached(io_tree, hole_start, block_end - 1,
|
||||
&cached_state);
|
||||
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start,
|
||||
block_end - 1, &cached_state);
|
||||
cur_offset = hole_start;
|
||||
while (1) {
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
|
||||
@ -8318,22 +8339,21 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
|
||||
struct bio *orig_bio = dip->orig_bio;
|
||||
u64 start_sector = orig_bio->bi_iter.bi_sector;
|
||||
u64 file_offset = dip->logical_offset;
|
||||
u64 map_length;
|
||||
int async_submit = 0;
|
||||
u64 submit_len;
|
||||
int clone_offset = 0;
|
||||
int clone_len;
|
||||
int ret;
|
||||
blk_status_t status;
|
||||
struct btrfs_io_geometry geom;
|
||||
|
||||
map_length = orig_bio->bi_iter.bi_size;
|
||||
submit_len = map_length;
|
||||
ret = btrfs_map_block(fs_info, btrfs_op(orig_bio), start_sector << 9,
|
||||
&map_length, NULL, 0);
|
||||
submit_len = orig_bio->bi_iter.bi_size;
|
||||
ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
|
||||
start_sector << 9, submit_len, &geom);
|
||||
if (ret)
|
||||
return -EIO;
|
||||
|
||||
if (map_length >= submit_len) {
|
||||
if (geom.len >= submit_len) {
|
||||
bio = orig_bio;
|
||||
dip->flags |= BTRFS_DIO_ORIG_BIO_SUBMITTED;
|
||||
goto submit;
|
||||
@ -8346,10 +8366,10 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
|
||||
async_submit = 1;
|
||||
|
||||
/* bio split */
|
||||
ASSERT(map_length <= INT_MAX);
|
||||
ASSERT(geom.len <= INT_MAX);
|
||||
atomic_inc(&dip->pending_bios);
|
||||
do {
|
||||
clone_len = min_t(int, submit_len, map_length);
|
||||
clone_len = min_t(int, submit_len, geom.len);
|
||||
|
||||
/*
|
||||
* This will never fail as it's passing GPF_NOFS and
|
||||
@ -8386,9 +8406,8 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
|
||||
start_sector += clone_len >> 9;
|
||||
file_offset += clone_len;
|
||||
|
||||
map_length = submit_len;
|
||||
ret = btrfs_map_block(fs_info, btrfs_op(orig_bio),
|
||||
start_sector << 9, &map_length, NULL, 0);
|
||||
ret = btrfs_get_io_geometry(fs_info, btrfs_op(orig_bio),
|
||||
start_sector << 9, submit_len, &geom);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
} while (submit_len > 0);
|
||||
|
@ -43,6 +43,8 @@
|
||||
#include "qgroup.h"
|
||||
#include "tree-log.h"
|
||||
#include "compression.h"
|
||||
#include "space-info.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
|
||||
@ -3993,6 +3995,27 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
||||
if (!same_inode)
|
||||
inode_dio_wait(inode_out);
|
||||
|
||||
/*
|
||||
* Workaround to make sure NOCOW buffered write reach disk as NOCOW.
|
||||
*
|
||||
* Btrfs' back references do not have a block level granularity, they
|
||||
* work at the whole extent level.
|
||||
* NOCOW buffered write without data space reserved may not be able
|
||||
* to fall back to CoW due to lack of data space, thus could cause
|
||||
* data loss.
|
||||
*
|
||||
* Here we take a shortcut by flushing the whole inode, so that all
|
||||
* nocow write should reach disk as nocow before we increase the
|
||||
* reference of the extent. We could do better by only flushing NOCOW
|
||||
* data, but that needs extra accounting.
|
||||
*
|
||||
* Also we don't need to check ASYNC_EXTENT, as async extent will be
|
||||
* CoWed anyway, not affecting nocow part.
|
||||
*/
|
||||
ret = filemap_flush(inode_in->i_mapping);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
|
||||
wb_len);
|
||||
if (ret < 0)
|
||||
|
@ -15,19 +15,19 @@
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
|
||||
{
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
atomic_inc(&eb->spinning_writers);
|
||||
WARN_ON(eb->spinning_writers);
|
||||
eb->spinning_writers++;
|
||||
}
|
||||
|
||||
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
|
||||
{
|
||||
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
|
||||
atomic_dec(&eb->spinning_writers);
|
||||
WARN_ON(eb->spinning_writers != 1);
|
||||
eb->spinning_writers--;
|
||||
}
|
||||
|
||||
static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
|
||||
{
|
||||
WARN_ON(atomic_read(&eb->spinning_writers));
|
||||
WARN_ON(eb->spinning_writers);
|
||||
}
|
||||
|
||||
static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
|
||||
@ -58,17 +58,17 @@ static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
|
||||
|
||||
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
|
||||
{
|
||||
atomic_inc(&eb->write_locks);
|
||||
eb->write_locks++;
|
||||
}
|
||||
|
||||
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
|
||||
{
|
||||
atomic_dec(&eb->write_locks);
|
||||
eb->write_locks--;
|
||||
}
|
||||
|
||||
void btrfs_assert_tree_locked(struct extent_buffer *eb)
|
||||
{
|
||||
BUG_ON(!atomic_read(&eb->write_locks));
|
||||
BUG_ON(!eb->write_locks);
|
||||
}
|
||||
|
||||
#else
|
||||
@ -111,10 +111,10 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
|
||||
*/
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner)
|
||||
return;
|
||||
if (atomic_read(&eb->blocking_writers) == 0) {
|
||||
if (eb->blocking_writers == 0) {
|
||||
btrfs_assert_spinning_writers_put(eb);
|
||||
btrfs_assert_tree_locked(eb);
|
||||
atomic_inc(&eb->blocking_writers);
|
||||
eb->blocking_writers++;
|
||||
write_unlock(&eb->lock);
|
||||
}
|
||||
}
|
||||
@ -148,12 +148,11 @@ void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
|
||||
*/
|
||||
if (eb->lock_nested && current->pid == eb->lock_owner)
|
||||
return;
|
||||
BUG_ON(atomic_read(&eb->blocking_writers) != 1);
|
||||
write_lock(&eb->lock);
|
||||
BUG_ON(eb->blocking_writers != 1);
|
||||
btrfs_assert_spinning_writers_get(eb);
|
||||
/* atomic_dec_and_test implies a barrier */
|
||||
if (atomic_dec_and_test(&eb->blocking_writers))
|
||||
cond_wake_up_nomb(&eb->write_lock_wq);
|
||||
if (--eb->blocking_writers == 0)
|
||||
cond_wake_up(&eb->write_lock_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -167,12 +166,10 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
|
||||
if (trace_btrfs_tree_read_lock_enabled())
|
||||
start_ns = ktime_get_ns();
|
||||
again:
|
||||
BUG_ON(!atomic_read(&eb->blocking_writers) &&
|
||||
current->pid == eb->lock_owner);
|
||||
|
||||
read_lock(&eb->lock);
|
||||
if (atomic_read(&eb->blocking_writers) &&
|
||||
current->pid == eb->lock_owner) {
|
||||
BUG_ON(eb->blocking_writers == 0 &&
|
||||
current->pid == eb->lock_owner);
|
||||
if (eb->blocking_writers && current->pid == eb->lock_owner) {
|
||||
/*
|
||||
* This extent is already write-locked by our thread. We allow
|
||||
* an additional read lock to be added because it's for the same
|
||||
@ -185,10 +182,10 @@ again:
|
||||
trace_btrfs_tree_read_lock(eb, start_ns);
|
||||
return;
|
||||
}
|
||||
if (atomic_read(&eb->blocking_writers)) {
|
||||
if (eb->blocking_writers) {
|
||||
read_unlock(&eb->lock);
|
||||
wait_event(eb->write_lock_wq,
|
||||
atomic_read(&eb->blocking_writers) == 0);
|
||||
eb->blocking_writers == 0);
|
||||
goto again;
|
||||
}
|
||||
btrfs_assert_tree_read_locks_get(eb);
|
||||
@ -203,11 +200,11 @@ again:
|
||||
*/
|
||||
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
|
||||
{
|
||||
if (atomic_read(&eb->blocking_writers))
|
||||
if (eb->blocking_writers)
|
||||
return 0;
|
||||
|
||||
read_lock(&eb->lock);
|
||||
if (atomic_read(&eb->blocking_writers)) {
|
||||
if (eb->blocking_writers) {
|
||||
read_unlock(&eb->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -223,13 +220,13 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
|
||||
*/
|
||||
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
|
||||
{
|
||||
if (atomic_read(&eb->blocking_writers))
|
||||
if (eb->blocking_writers)
|
||||
return 0;
|
||||
|
||||
if (!read_trylock(&eb->lock))
|
||||
return 0;
|
||||
|
||||
if (atomic_read(&eb->blocking_writers)) {
|
||||
if (eb->blocking_writers) {
|
||||
read_unlock(&eb->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -245,13 +242,11 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
|
||||
*/
|
||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
|
||||
{
|
||||
if (atomic_read(&eb->blocking_writers) ||
|
||||
atomic_read(&eb->blocking_readers))
|
||||
if (eb->blocking_writers || atomic_read(&eb->blocking_readers))
|
||||
return 0;
|
||||
|
||||
write_lock(&eb->lock);
|
||||
if (atomic_read(&eb->blocking_writers) ||
|
||||
atomic_read(&eb->blocking_readers)) {
|
||||
if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) {
|
||||
write_unlock(&eb->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -322,10 +317,9 @@ void btrfs_tree_lock(struct extent_buffer *eb)
|
||||
WARN_ON(eb->lock_owner == current->pid);
|
||||
again:
|
||||
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
|
||||
wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
|
||||
wait_event(eb->write_lock_wq, eb->blocking_writers == 0);
|
||||
write_lock(&eb->lock);
|
||||
if (atomic_read(&eb->blocking_readers) ||
|
||||
atomic_read(&eb->blocking_writers)) {
|
||||
if (atomic_read(&eb->blocking_readers) || eb->blocking_writers) {
|
||||
write_unlock(&eb->lock);
|
||||
goto again;
|
||||
}
|
||||
@ -340,7 +334,7 @@ again:
|
||||
*/
|
||||
void btrfs_tree_unlock(struct extent_buffer *eb)
|
||||
{
|
||||
int blockers = atomic_read(&eb->blocking_writers);
|
||||
int blockers = eb->blocking_writers;
|
||||
|
||||
BUG_ON(blockers > 1);
|
||||
|
||||
@ -351,7 +345,7 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
|
||||
|
||||
if (blockers) {
|
||||
btrfs_assert_no_spinning_writers(eb);
|
||||
atomic_dec(&eb->blocking_writers);
|
||||
eb->blocking_writers--;
|
||||
/* Use the lighter barrier after atomic */
|
||||
smp_mb__after_atomic();
|
||||
cond_wake_up_nomb(&eb->write_lock_wq);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "extent_io.h"
|
||||
#include "disk-io.h"
|
||||
#include "compression.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
static struct kmem_cache *btrfs_ordered_extent_cache;
|
||||
|
||||
@ -924,14 +925,16 @@ out:
|
||||
* be reclaimed before their checksum is actually put into the btree
|
||||
*/
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
u32 *sum, int len)
|
||||
u8 *sum, int len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_ordered_sum *ordered_sum;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
|
||||
unsigned long num_sectors;
|
||||
unsigned long i;
|
||||
u32 sectorsize = btrfs_inode_sectorsize(inode);
|
||||
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||
int index = 0;
|
||||
|
||||
ordered = btrfs_lookup_ordered_extent(inode, offset);
|
||||
@ -947,10 +950,10 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
num_sectors = ordered_sum->len >>
|
||||
inode->i_sb->s_blocksize_bits;
|
||||
num_sectors = min_t(int, len - index, num_sectors - i);
|
||||
memcpy(sum + index, ordered_sum->sums + i,
|
||||
num_sectors);
|
||||
memcpy(sum + index, ordered_sum->sums + i * csum_size,
|
||||
num_sectors * csum_size);
|
||||
|
||||
index += (int)num_sectors;
|
||||
index += (int)num_sectors * csum_size;
|
||||
if (index == len)
|
||||
goto out;
|
||||
disk_bytenr += num_sectors * sectorsize;
|
||||
@ -962,6 +965,51 @@ out:
|
||||
return index;
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_flush_ordered_range - Lock the passed range and ensures all pending
|
||||
* ordered extents in it are run to completion.
|
||||
*
|
||||
* @tree: IO tree used for locking out other users of the range
|
||||
* @inode: Inode whose ordered tree is to be searched
|
||||
* @start: Beginning of range to flush
|
||||
* @end: Last byte of range to lock
|
||||
* @cached_state: If passed, will return the extent state responsible for the
|
||||
* locked range. It's the caller's responsibility to free the cached state.
|
||||
*
|
||||
* This function always returns with the given range locked, ensuring after it's
|
||||
* called no order extent can be pending.
|
||||
*/
|
||||
void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
|
||||
struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct extent_state *cachedp = NULL;
|
||||
|
||||
if (cached_state)
|
||||
cachedp = *cached_state;
|
||||
|
||||
while (1) {
|
||||
lock_extent_bits(tree, start, end, &cachedp);
|
||||
ordered = btrfs_lookup_ordered_range(inode, start,
|
||||
end - start + 1);
|
||||
if (!ordered) {
|
||||
/*
|
||||
* If no external cached_state has been passed then
|
||||
* decrement the extra ref taken for cachedp since we
|
||||
* aren't exposing it outside of this function
|
||||
*/
|
||||
if (!cached_state)
|
||||
refcount_dec(&cachedp->refs);
|
||||
break;
|
||||
}
|
||||
unlock_extent_cached(tree, start, end, &cachedp);
|
||||
btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
}
|
||||
|
||||
int __init ordered_data_init(void)
|
||||
{
|
||||
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
|
||||
|
@ -23,7 +23,7 @@ struct btrfs_ordered_sum {
|
||||
int len;
|
||||
struct list_head list;
|
||||
/* last field is a variable length array of csums */
|
||||
u32 sums[];
|
||||
u8 sums[];
|
||||
};
|
||||
|
||||
/*
|
||||
@ -183,11 +183,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
|
||||
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
|
||||
struct btrfs_ordered_extent *ordered);
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
u32 *sum, int len);
|
||||
u8 *sum, int len);
|
||||
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
|
||||
struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state);
|
||||
int __init ordered_data_init(void);
|
||||
void __cold ordered_data_exit(void);
|
||||
|
||||
|
@ -153,11 +153,11 @@ static void print_eb_refs_lock(struct extent_buffer *eb)
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
btrfs_info(eb->fs_info,
|
||||
"refs %u lock (w:%d r:%d bw:%d br:%d sw:%d sr:%d) lock_owner %u current %u",
|
||||
atomic_read(&eb->refs), atomic_read(&eb->write_locks),
|
||||
atomic_read(&eb->refs), eb->write_locks,
|
||||
atomic_read(&eb->read_locks),
|
||||
atomic_read(&eb->blocking_writers),
|
||||
eb->blocking_writers,
|
||||
atomic_read(&eb->blocking_readers),
|
||||
atomic_read(&eb->spinning_writers),
|
||||
eb->spinning_writers,
|
||||
atomic_read(&eb->spinning_readers),
|
||||
eb->lock_owner, current->pid);
|
||||
#endif
|
||||
|
@ -257,11 +257,7 @@ static int prop_compression_validate(const char *value, size_t len)
|
||||
if (!value)
|
||||
return 0;
|
||||
|
||||
if (!strncmp("lzo", value, 3))
|
||||
return 0;
|
||||
else if (!strncmp("zlib", value, 4))
|
||||
return 0;
|
||||
else if (!strncmp("zstd", value, 4))
|
||||
if (btrfs_compress_is_valid_type(value, len))
|
||||
return 0;
|
||||
|
||||
return -EINVAL;
|
||||
@ -341,7 +337,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
|
||||
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
|
||||
const struct prop_handler *h = &prop_handlers[i];
|
||||
const char *value;
|
||||
u64 num_bytes;
|
||||
u64 num_bytes = 0;
|
||||
|
||||
if (!h->inheritable)
|
||||
continue;
|
||||
|
@ -2614,6 +2614,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
int ret = 0;
|
||||
int i;
|
||||
u64 *i_qgroups;
|
||||
bool committing = false;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *srcgroup;
|
||||
@ -2621,7 +2622,25 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
u32 level_size = 0;
|
||||
u64 nums;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
/*
|
||||
* There are only two callers of this function.
|
||||
*
|
||||
* One in create_subvol() in the ioctl context, which needs to hold
|
||||
* the qgroup_ioctl_lock.
|
||||
*
|
||||
* The other one in create_pending_snapshot() where no other qgroup
|
||||
* code can modify the fs as they all need to either start a new trans
|
||||
* or hold a trans handler, thus we don't need to hold
|
||||
* qgroup_ioctl_lock.
|
||||
* This would avoid long and complex lock chain and make lockdep happy.
|
||||
*/
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
|
||||
committing = true;
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
if (!committing)
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
goto out;
|
||||
|
||||
@ -2785,7 +2804,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
unlock:
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
out:
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
if (!committing)
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#ifndef BTRFS_RAID56_H
|
||||
#define BTRFS_RAID56_H
|
||||
|
||||
static inline int nr_parity_stripes(struct map_lookup *map)
|
||||
static inline int nr_parity_stripes(const struct map_lookup *map)
|
||||
{
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID5)
|
||||
return 1;
|
||||
@ -17,7 +17,7 @@ static inline int nr_parity_stripes(struct map_lookup *map)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int nr_data_stripes(struct map_lookup *map)
|
||||
static inline int nr_data_stripes(const struct map_lookup *map)
|
||||
{
|
||||
return map->num_stripes - nr_parity_stripes(map);
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "inode-map.h"
|
||||
#include "qgroup.h"
|
||||
#include "print-tree.h"
|
||||
#include "delalloc-space.h"
|
||||
|
||||
/*
|
||||
* backref_node, mapping_node and tree_block start with this
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include "transaction.h"
|
||||
#include "disk-io.h"
|
||||
#include "print-tree.h"
|
||||
#include "qgroup.h"
|
||||
#include "space-info.h"
|
||||
|
||||
/*
|
||||
* Read a root item from the tree. In case we detect a root item smaller then
|
||||
@ -497,3 +499,57 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
|
||||
btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
|
||||
spin_unlock(&root->root_item_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_subvolume_reserve_metadata() - reserve space for subvolume operation
|
||||
* root: the root of the parent directory
|
||||
* rsv: block reservation
|
||||
* items: the number of items that we need do reservation
|
||||
* use_global_rsv: allow fallback to the global block reservation
|
||||
*
|
||||
* This function is used to reserve the space for snapshot/subvolume
|
||||
* creation and deletion. Those operations are different with the
|
||||
* common file/directory operations, they change two fs/file trees
|
||||
* and root tree, the number of items that the qgroup reserves is
|
||||
* different with the free space reservation. So we can not use
|
||||
* the space reservation mechanism in start_transaction().
|
||||
*/
|
||||
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *rsv, int items,
|
||||
bool use_global_rsv)
|
||||
{
|
||||
u64 qgroup_num_bytes = 0;
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
|
||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
|
||||
/* One for parent inode, two for dir entries */
|
||||
qgroup_num_bytes = 3 * fs_info->nodesize;
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root,
|
||||
qgroup_num_bytes, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
|
||||
rsv->space_info = btrfs_find_space_info(fs_info,
|
||||
BTRFS_BLOCK_GROUP_METADATA);
|
||||
ret = btrfs_block_rsv_add(root, rsv, num_bytes,
|
||||
BTRFS_RESERVE_FLUSH_ALL);
|
||||
|
||||
if (ret == -ENOSPC && use_global_rsv)
|
||||
ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, true);
|
||||
|
||||
if (ret && qgroup_num_bytes)
|
||||
btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv)
|
||||
{
|
||||
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "ctree.h"
|
||||
#include "volumes.h"
|
||||
#include "disk-io.h"
|
||||
@ -1787,11 +1788,12 @@ static int scrub_checksum(struct scrub_block *sblock)
|
||||
static int scrub_checksum_data(struct scrub_block *sblock)
|
||||
{
|
||||
struct scrub_ctx *sctx = sblock->sctx;
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
u8 csum[BTRFS_CSUM_SIZE];
|
||||
u8 *on_disk_csum;
|
||||
struct page *page;
|
||||
void *buffer;
|
||||
u32 crc = ~(u32)0;
|
||||
u64 len;
|
||||
int index;
|
||||
|
||||
@ -1799,6 +1801,9 @@ static int scrub_checksum_data(struct scrub_block *sblock)
|
||||
if (!sblock->pagev[0]->have_csum)
|
||||
return 0;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
|
||||
on_disk_csum = sblock->pagev[0]->csum;
|
||||
page = sblock->pagev[0]->page;
|
||||
buffer = kmap_atomic(page);
|
||||
@ -1808,7 +1813,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
|
||||
for (;;) {
|
||||
u64 l = min_t(u64, len, PAGE_SIZE);
|
||||
|
||||
crc = btrfs_csum_data(buffer, crc, l);
|
||||
crypto_shash_update(shash, buffer, l);
|
||||
kunmap_atomic(buffer);
|
||||
len -= l;
|
||||
if (len == 0)
|
||||
@ -1820,7 +1825,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
|
||||
buffer = kmap_atomic(page);
|
||||
}
|
||||
|
||||
btrfs_csum_final(crc, csum);
|
||||
crypto_shash_final(shash, csum);
|
||||
if (memcmp(csum, on_disk_csum, sctx->csum_size))
|
||||
sblock->checksum_error = 1;
|
||||
|
||||
@ -1832,16 +1837,19 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
|
||||
struct scrub_ctx *sctx = sblock->sctx;
|
||||
struct btrfs_header *h;
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
u8 calculated_csum[BTRFS_CSUM_SIZE];
|
||||
u8 on_disk_csum[BTRFS_CSUM_SIZE];
|
||||
struct page *page;
|
||||
void *mapped_buffer;
|
||||
u64 mapped_size;
|
||||
void *p;
|
||||
u32 crc = ~(u32)0;
|
||||
u64 len;
|
||||
int index;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
|
||||
BUG_ON(sblock->page_count < 1);
|
||||
page = sblock->pagev[0]->page;
|
||||
mapped_buffer = kmap_atomic(page);
|
||||
@ -1875,7 +1883,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
|
||||
for (;;) {
|
||||
u64 l = min_t(u64, len, mapped_size);
|
||||
|
||||
crc = btrfs_csum_data(p, crc, l);
|
||||
crypto_shash_update(shash, p, l);
|
||||
kunmap_atomic(mapped_buffer);
|
||||
len -= l;
|
||||
if (len == 0)
|
||||
@ -1889,7 +1897,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
|
||||
p = mapped_buffer;
|
||||
}
|
||||
|
||||
btrfs_csum_final(crc, calculated_csum);
|
||||
crypto_shash_final(shash, calculated_csum);
|
||||
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
|
||||
sblock->checksum_error = 1;
|
||||
|
||||
@ -1900,18 +1908,22 @@ static int scrub_checksum_super(struct scrub_block *sblock)
|
||||
{
|
||||
struct btrfs_super_block *s;
|
||||
struct scrub_ctx *sctx = sblock->sctx;
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
u8 calculated_csum[BTRFS_CSUM_SIZE];
|
||||
u8 on_disk_csum[BTRFS_CSUM_SIZE];
|
||||
struct page *page;
|
||||
void *mapped_buffer;
|
||||
u64 mapped_size;
|
||||
void *p;
|
||||
u32 crc = ~(u32)0;
|
||||
int fail_gen = 0;
|
||||
int fail_cor = 0;
|
||||
u64 len;
|
||||
int index;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
|
||||
BUG_ON(sblock->page_count < 1);
|
||||
page = sblock->pagev[0]->page;
|
||||
mapped_buffer = kmap_atomic(page);
|
||||
@ -1934,7 +1946,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
|
||||
for (;;) {
|
||||
u64 l = min_t(u64, len, mapped_size);
|
||||
|
||||
crc = btrfs_csum_data(p, crc, l);
|
||||
crypto_shash_update(shash, p, l);
|
||||
kunmap_atomic(mapped_buffer);
|
||||
len -= l;
|
||||
if (len == 0)
|
||||
@ -1948,7 +1960,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
|
||||
p = mapped_buffer;
|
||||
}
|
||||
|
||||
btrfs_csum_final(crc, calculated_csum);
|
||||
crypto_shash_final(shash, calculated_csum);
|
||||
if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size))
|
||||
++fail_cor;
|
||||
|
||||
@ -2448,7 +2460,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u8 *csum)
|
||||
ASSERT(index < UINT_MAX);
|
||||
|
||||
num_sectors = sum->len / sctx->fs_info->sectorsize;
|
||||
memcpy(csum, sum->sums + index, sctx->csum_size);
|
||||
memcpy(csum, sum->sums + index * sctx->csum_size, sctx->csum_size);
|
||||
if (index == num_sectors - 1) {
|
||||
list_del(&sum->list);
|
||||
kfree(sum);
|
||||
@ -2660,18 +2672,18 @@ static int get_raid56_logic_offset(u64 physical, int num,
|
||||
u64 last_offset;
|
||||
u32 stripe_index;
|
||||
u32 rot;
|
||||
const int data_stripes = nr_data_stripes(map);
|
||||
|
||||
last_offset = (physical - map->stripes[num].physical) *
|
||||
nr_data_stripes(map);
|
||||
last_offset = (physical - map->stripes[num].physical) * data_stripes;
|
||||
if (stripe_start)
|
||||
*stripe_start = last_offset;
|
||||
|
||||
*offset = last_offset;
|
||||
for (i = 0; i < nr_data_stripes(map); i++) {
|
||||
for (i = 0; i < data_stripes; i++) {
|
||||
*offset = last_offset + i * map->stripe_len;
|
||||
|
||||
stripe_nr = div64_u64(*offset, map->stripe_len);
|
||||
stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
|
||||
stripe_nr = div_u64(stripe_nr, data_stripes);
|
||||
|
||||
/* Work out the disk rotation on this stripe-set */
|
||||
stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
|
||||
@ -3079,7 +3091,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
offset = map->stripe_len * (num / map->sub_stripes);
|
||||
increment = map->stripe_len * factor;
|
||||
mirror_num = num % map->sub_stripes + 1;
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
|
||||
increment = map->stripe_len;
|
||||
mirror_num = num % map->num_stripes + 1;
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
|
||||
@ -3410,15 +3422,15 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
|
||||
struct btrfs_block_group_cache *cache)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct map_lookup *map;
|
||||
struct extent_map *em;
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
read_lock(&map_tree->map_tree.lock);
|
||||
em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
|
||||
read_unlock(&map_tree->map_tree.lock);
|
||||
read_lock(&map_tree->lock);
|
||||
em = lookup_extent_mapping(map_tree, chunk_offset, 1);
|
||||
read_unlock(&map_tree->lock);
|
||||
|
||||
if (!em) {
|
||||
/*
|
||||
|
@ -686,7 +686,7 @@ static int send_cmd(struct send_ctx *sctx)
|
||||
hdr->len = cpu_to_le32(sctx->send_size - sizeof(*hdr));
|
||||
hdr->crc = 0;
|
||||
|
||||
crc = crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
|
||||
crc = btrfs_crc32c(0, (unsigned char *)sctx->send_buf, sctx->send_size);
|
||||
hdr->crc = cpu_to_le32(crc);
|
||||
|
||||
ret = write_buf(sctx->send_filp, sctx->send_buf, sctx->send_size,
|
||||
@ -6929,9 +6929,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
btrfs_warn_rl(fs_info,
|
||||
"cannot run send because a balance operation is in progress");
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
fs_info->send_in_progress++;
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
|
||||
current->journal_info = BTRFS_SEND_TRANS_STUB;
|
||||
ret = send_subvol(sctx);
|
||||
current->journal_info = NULL;
|
||||
mutex_lock(&fs_info->balance_mutex);
|
||||
fs_info->send_in_progress--;
|
||||
mutex_unlock(&fs_info->balance_mutex);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
|
1094
fs/btrfs/space-info.c
Normal file
1094
fs/btrfs/space-info.c
Normal file
File diff suppressed because it is too large
Load Diff
133
fs/btrfs/space-info.h
Normal file
133
fs/btrfs/space-info.h
Normal file
@ -0,0 +1,133 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_SPACE_INFO_H
|
||||
#define BTRFS_SPACE_INFO_H
|
||||
|
||||
struct btrfs_space_info {
|
||||
spinlock_t lock;
|
||||
|
||||
u64 total_bytes; /* total bytes in the space,
|
||||
this doesn't take mirrors into account */
|
||||
u64 bytes_used; /* total bytes used,
|
||||
this doesn't take mirrors into account */
|
||||
u64 bytes_pinned; /* total bytes pinned, will be freed when the
|
||||
transaction finishes */
|
||||
u64 bytes_reserved; /* total bytes the allocator has reserved for
|
||||
current allocations */
|
||||
u64 bytes_may_use; /* number of bytes that may be used for
|
||||
delalloc/allocations */
|
||||
u64 bytes_readonly; /* total bytes that are read only */
|
||||
|
||||
u64 max_extent_size; /* This will hold the maximum extent size of
|
||||
the space info if we had an ENOSPC in the
|
||||
allocator. */
|
||||
|
||||
unsigned int full:1; /* indicates that we cannot allocate any more
|
||||
chunks for this space */
|
||||
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
|
||||
|
||||
unsigned int flush:1; /* set if we are trying to make space */
|
||||
|
||||
unsigned int force_alloc; /* set if we need to force a chunk
|
||||
alloc for this space */
|
||||
|
||||
u64 disk_used; /* total bytes used on disk */
|
||||
u64 disk_total; /* total bytes on disk, takes mirrors into
|
||||
account */
|
||||
|
||||
u64 flags;
|
||||
|
||||
/*
|
||||
* bytes_pinned is kept in line with what is actually pinned, as in
|
||||
* we've called update_block_group and dropped the bytes_used counter
|
||||
* and increased the bytes_pinned counter. However this means that
|
||||
* bytes_pinned does not reflect the bytes that will be pinned once the
|
||||
* delayed refs are flushed, so this counter is inc'ed every time we
|
||||
* call btrfs_free_extent so it is a realtime count of what will be
|
||||
* freed once the transaction is committed. It will be zeroed every
|
||||
* time the transaction commits.
|
||||
*/
|
||||
struct percpu_counter total_bytes_pinned;
|
||||
|
||||
struct list_head list;
|
||||
/* Protected by the spinlock 'lock'. */
|
||||
struct list_head ro_bgs;
|
||||
struct list_head priority_tickets;
|
||||
struct list_head tickets;
|
||||
/*
|
||||
* tickets_id just indicates the next ticket will be handled, so note
|
||||
* it's not stored per ticket.
|
||||
*/
|
||||
u64 tickets_id;
|
||||
|
||||
struct rw_semaphore groups_sem;
|
||||
/* for block groups in our same type */
|
||||
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
|
||||
wait_queue_head_t wait;
|
||||
|
||||
struct kobject kobj;
|
||||
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
|
||||
};
|
||||
|
||||
struct reserve_ticket {
|
||||
u64 orig_bytes;
|
||||
u64 bytes;
|
||||
int error;
|
||||
struct list_head list;
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
|
||||
{
|
||||
return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
|
||||
(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* Declare a helper function to detect underflow of various space info members
|
||||
*/
|
||||
#define DECLARE_SPACE_INFO_UPDATE(name) \
|
||||
static inline void \
|
||||
btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \
|
||||
struct btrfs_space_info *sinfo, \
|
||||
s64 bytes) \
|
||||
{ \
|
||||
lockdep_assert_held(&sinfo->lock); \
|
||||
trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \
|
||||
if (bytes < 0 && sinfo->name < -bytes) { \
|
||||
WARN_ON(1); \
|
||||
sinfo->name = 0; \
|
||||
return; \
|
||||
} \
|
||||
sinfo->name += bytes; \
|
||||
}
|
||||
|
||||
DECLARE_SPACE_INFO_UPDATE(bytes_may_use);
|
||||
DECLARE_SPACE_INFO_UPDATE(bytes_pinned);
|
||||
|
||||
void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
u64 num_bytes);
|
||||
void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
u64 num_bytes);
|
||||
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
u64 total_bytes, u64 bytes_used,
|
||||
u64 bytes_readonly,
|
||||
struct btrfs_space_info **space_info);
|
||||
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
|
||||
u64 flags);
|
||||
u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
|
||||
bool may_use_included);
|
||||
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
|
||||
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *info, u64 bytes,
|
||||
int dump_block_groups);
|
||||
int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 orig_bytes,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
|
||||
#endif /* BTRFS_SPACE_INFO_H */
|
@ -42,6 +42,7 @@
|
||||
#include "dev-replace.h"
|
||||
#include "free-space-cache.h"
|
||||
#include "backref.h"
|
||||
#include "space-info.h"
|
||||
#include "tests/btrfs-tests.h"
|
||||
|
||||
#include "qgroup.h"
|
||||
@ -1553,6 +1554,8 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
|
||||
} else {
|
||||
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
|
||||
btrfs_sb(s)->bdev_holder = fs_type;
|
||||
if (!strstr(crc32c_impl(), "generic"))
|
||||
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
|
||||
error = btrfs_fill_super(s, fs_devices, data);
|
||||
}
|
||||
if (!error)
|
||||
@ -1601,14 +1604,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
|
||||
{
|
||||
struct vfsmount *mnt_root;
|
||||
struct dentry *root;
|
||||
fmode_t mode = FMODE_READ;
|
||||
char *subvol_name = NULL;
|
||||
u64 subvol_objectid = 0;
|
||||
int error = 0;
|
||||
|
||||
if (!(flags & SB_RDONLY))
|
||||
mode |= FMODE_WRITE;
|
||||
|
||||
error = btrfs_parse_subvol_options(data, &subvol_name,
|
||||
&subvol_objectid);
|
||||
if (error) {
|
||||
@ -1904,8 +1903,9 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
|
||||
u64 type;
|
||||
u64 avail_space;
|
||||
u64 min_stripe_size;
|
||||
int min_stripes = 1, num_stripes = 1;
|
||||
int min_stripes, num_stripes = 1;
|
||||
int i = 0, nr_devices;
|
||||
const struct btrfs_raid_attr *rattr;
|
||||
|
||||
/*
|
||||
* We aren't under the device list lock, so this is racy-ish, but good
|
||||
@ -1929,21 +1929,18 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
|
||||
|
||||
/* calc min stripe number for data space allocation */
|
||||
type = btrfs_data_alloc_profile(fs_info);
|
||||
if (type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||
min_stripes = 2;
|
||||
num_stripes = nr_devices;
|
||||
} else if (type & BTRFS_BLOCK_GROUP_RAID1) {
|
||||
min_stripes = 2;
|
||||
num_stripes = 2;
|
||||
} else if (type & BTRFS_BLOCK_GROUP_RAID10) {
|
||||
min_stripes = 4;
|
||||
num_stripes = 4;
|
||||
}
|
||||
rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
|
||||
min_stripes = rattr->devs_min;
|
||||
|
||||
if (type & BTRFS_BLOCK_GROUP_DUP)
|
||||
min_stripe_size = 2 * BTRFS_STRIPE_LEN;
|
||||
else
|
||||
min_stripe_size = BTRFS_STRIPE_LEN;
|
||||
if (type & BTRFS_BLOCK_GROUP_RAID0)
|
||||
num_stripes = nr_devices;
|
||||
else if (type & BTRFS_BLOCK_GROUP_RAID1)
|
||||
num_stripes = 2;
|
||||
else if (type & BTRFS_BLOCK_GROUP_RAID10)
|
||||
num_stripes = 4;
|
||||
|
||||
/* Adjust for more than 1 stripe per device */
|
||||
min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
|
||||
@ -2466,3 +2463,4 @@ late_initcall(init_btrfs_fs);
|
||||
module_exit(exit_btrfs_fs)
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_SOFTDEP("pre: crc32c");
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "transaction.h"
|
||||
#include "sysfs.h"
|
||||
#include "volumes.h"
|
||||
#include "space-info.h"
|
||||
|
||||
static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj);
|
||||
static inline struct btrfs_fs_devices *to_fs_devs(struct kobject *kobj);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "btrfs-tests.h"
|
||||
#include "../ctree.h"
|
||||
#include "../extent_io.h"
|
||||
#include "../btrfs_inode.h"
|
||||
|
||||
#define PROCESS_UNLOCK (1 << 0)
|
||||
#define PROCESS_RELEASE (1 << 1)
|
||||
@ -58,7 +59,7 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
|
||||
static int test_find_delalloc(u32 sectorsize)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct extent_io_tree tmp;
|
||||
struct extent_io_tree *tmp;
|
||||
struct page *page;
|
||||
struct page *locked_page = NULL;
|
||||
unsigned long index = 0;
|
||||
@ -76,12 +77,13 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
test_std_err(TEST_ALLOC_INODE);
|
||||
return -ENOMEM;
|
||||
}
|
||||
tmp = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
/*
|
||||
* Passing NULL as we don't have fs_info but tracepoints are not used
|
||||
* at this point
|
||||
*/
|
||||
extent_io_tree_init(NULL, &tmp, IO_TREE_SELFTEST, NULL);
|
||||
extent_io_tree_init(NULL, tmp, IO_TREE_SELFTEST, NULL);
|
||||
|
||||
/*
|
||||
* First go through and create and mark all of our pages dirty, we pin
|
||||
@ -108,10 +110,10 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
* |--- delalloc ---|
|
||||
* |--- search ---|
|
||||
*/
|
||||
set_extent_delalloc(&tmp, 0, sectorsize - 1, 0, NULL);
|
||||
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
|
||||
start = 0;
|
||||
end = 0;
|
||||
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
test_err("should have found at least one delalloc");
|
||||
@ -122,7 +124,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
sectorsize - 1, start, end);
|
||||
goto out_bits;
|
||||
}
|
||||
unlock_extent(&tmp, start, end);
|
||||
unlock_extent(tmp, start, end);
|
||||
unlock_page(locked_page);
|
||||
put_page(locked_page);
|
||||
|
||||
@ -139,10 +141,10 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
test_err("couldn't find the locked page");
|
||||
goto out_bits;
|
||||
}
|
||||
set_extent_delalloc(&tmp, sectorsize, max_bytes - 1, 0, NULL);
|
||||
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
test_err("couldn't find delalloc in our range");
|
||||
@ -158,7 +160,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
test_err("there were unlocked pages in the range");
|
||||
goto out_bits;
|
||||
}
|
||||
unlock_extent(&tmp, start, end);
|
||||
unlock_extent(tmp, start, end);
|
||||
/* locked_page was unlocked above */
|
||||
put_page(locked_page);
|
||||
|
||||
@ -176,7 +178,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
}
|
||||
start = test_start;
|
||||
end = 0;
|
||||
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (found) {
|
||||
test_err("found range when we shouldn't have");
|
||||
@ -194,10 +196,10 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
*
|
||||
* We are re-using our test_start from above since it works out well.
|
||||
*/
|
||||
set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, 0, NULL);
|
||||
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
test_err("didn't find our range");
|
||||
@ -213,7 +215,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
test_err("pages in range were not all locked");
|
||||
goto out_bits;
|
||||
}
|
||||
unlock_extent(&tmp, start, end);
|
||||
unlock_extent(tmp, start, end);
|
||||
|
||||
/*
|
||||
* Now to test where we run into a page that is no longer dirty in the
|
||||
@ -238,7 +240,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
* this changes at any point in the future we will need to fix this
|
||||
* tests expected behavior.
|
||||
*/
|
||||
found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
test_err("didn't find our range");
|
||||
@ -256,7 +258,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
}
|
||||
ret = 0;
|
||||
out_bits:
|
||||
clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1);
|
||||
clear_extent_bits(tmp, 0, total_dirty - 1, (unsigned)-1);
|
||||
out:
|
||||
if (locked_page)
|
||||
put_page(locked_page);
|
||||
@ -432,6 +434,89 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_find_first_clear_extent_bit(void)
|
||||
{
|
||||
struct extent_io_tree tree;
|
||||
u64 start, end;
|
||||
|
||||
test_msg("running find_first_clear_extent_bit test");
|
||||
extent_io_tree_init(NULL, &tree, IO_TREE_SELFTEST, NULL);
|
||||
|
||||
/*
|
||||
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
|
||||
* 4M-32M
|
||||
*/
|
||||
set_extent_bits(&tree, SZ_1M, SZ_4M - 1,
|
||||
CHUNK_TRIMMED | CHUNK_ALLOCATED);
|
||||
|
||||
find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
|
||||
CHUNK_TRIMMED | CHUNK_ALLOCATED);
|
||||
|
||||
if (start != 0 || end != SZ_1M -1)
|
||||
test_err("error finding beginning range: start %llu end %llu",
|
||||
start, end);
|
||||
|
||||
/* Now add 32M-64M so that we have a hole between 4M-32M */
|
||||
set_extent_bits(&tree, SZ_32M, SZ_64M - 1,
|
||||
CHUNK_TRIMMED | CHUNK_ALLOCATED);
|
||||
|
||||
/*
|
||||
* Request first hole starting at 12M, we should get 4M-32M
|
||||
*/
|
||||
find_first_clear_extent_bit(&tree, 12 * SZ_1M, &start, &end,
|
||||
CHUNK_TRIMMED | CHUNK_ALLOCATED);
|
||||
|
||||
if (start != SZ_4M || end != SZ_32M - 1)
|
||||
test_err("error finding trimmed range: start %llu end %llu",
|
||||
start, end);
|
||||
|
||||
/*
|
||||
* Search in the middle of allocated range, should get the next one
|
||||
* available, which happens to be unallocated -> 4M-32M
|
||||
*/
|
||||
find_first_clear_extent_bit(&tree, SZ_2M, &start, &end,
|
||||
CHUNK_TRIMMED | CHUNK_ALLOCATED);
|
||||
|
||||
if (start != SZ_4M || end != SZ_32M -1)
|
||||
test_err("error finding next unalloc range: start %llu end %llu",
|
||||
start, end);
|
||||
|
||||
/*
|
||||
* Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag
|
||||
* being unset in this range, we should get the entry in range 64M-72M
|
||||
*/
|
||||
set_extent_bits(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED);
|
||||
find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
|
||||
CHUNK_TRIMMED);
|
||||
|
||||
if (start != SZ_64M || end != SZ_64M + SZ_8M - 1)
|
||||
test_err("error finding exact range: start %llu end %llu",
|
||||
start, end);
|
||||
|
||||
find_first_clear_extent_bit(&tree, SZ_64M - SZ_8M, &start, &end,
|
||||
CHUNK_TRIMMED);
|
||||
|
||||
/*
|
||||
* Search in the middle of set range whose immediate neighbour doesn't
|
||||
* have the bits set so it must be returned
|
||||
*/
|
||||
if (start != SZ_64M || end != SZ_64M + SZ_8M - 1)
|
||||
test_err("error finding next alloc range: start %llu end %llu",
|
||||
start, end);
|
||||
|
||||
/*
|
||||
* Search beyond any known range, shall return after last known range
|
||||
* and end should be -1
|
||||
*/
|
||||
find_first_clear_extent_bit(&tree, -1, &start, &end, CHUNK_TRIMMED);
|
||||
if (start != SZ_64M + SZ_8M || end != -1)
|
||||
test_err(
|
||||
"error handling beyond end of range search: start %llu end %llu",
|
||||
start, end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
int ret;
|
||||
@ -442,6 +527,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = test_find_first_clear_extent_bit();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = test_eb_bitmaps(sectorsize, nodesize);
|
||||
out:
|
||||
return ret;
|
||||
|
@ -66,7 +66,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_16K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [0, 16K)");
|
||||
goto out;
|
||||
@ -85,7 +87,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_32K; /* avoid merging */
|
||||
em->block_len = SZ_4K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [16K, 20K)");
|
||||
goto out;
|
||||
@ -104,7 +108,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info,
|
||||
em->len = len;
|
||||
em->block_start = start;
|
||||
em->block_len = len;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret) {
|
||||
test_err("case1 [%llu %llu]: ret %d", start, start + len, ret);
|
||||
goto out;
|
||||
@ -148,7 +154,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_1K;
|
||||
em->block_start = EXTENT_MAP_INLINE;
|
||||
em->block_len = (u64)-1;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [0, 1K)");
|
||||
goto out;
|
||||
@ -167,7 +175,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_4K;
|
||||
em->block_len = SZ_4K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [4K, 8K)");
|
||||
goto out;
|
||||
@ -186,7 +196,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_1K;
|
||||
em->block_start = EXTENT_MAP_INLINE;
|
||||
em->block_len = (u64)-1;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret) {
|
||||
test_err("case2 [0 1K]: ret %d", ret);
|
||||
goto out;
|
||||
@ -225,7 +237,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_4K;
|
||||
em->block_len = SZ_4K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [4K, 8K)");
|
||||
goto out;
|
||||
@ -244,7 +258,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_16K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret) {
|
||||
test_err("case3 [0x%llx 0x%llx): ret %d",
|
||||
start, start + len, ret);
|
||||
@ -320,7 +336,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_8K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_8K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [0, 8K)");
|
||||
goto out;
|
||||
@ -339,7 +357,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
em->len = 24 * SZ_1K;
|
||||
em->block_start = SZ_16K; /* avoid merging */
|
||||
em->block_len = 24 * SZ_1K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent range [8K, 32K)");
|
||||
goto out;
|
||||
@ -357,7 +377,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
em->len = SZ_32K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_32K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, start, len);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret) {
|
||||
test_err("case4 [0x%llx 0x%llx): ret %d",
|
||||
start, len, ret);
|
||||
|
@ -128,6 +128,24 @@ static inline int extwriter_counter_read(struct btrfs_transaction *trans)
|
||||
return atomic_read(&trans->num_extwriters);
|
||||
}
|
||||
|
||||
/*
|
||||
* To be called after all the new block groups attached to the transaction
|
||||
* handle have been created (btrfs_create_pending_block_groups()).
|
||||
*/
|
||||
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
if (!trans->chunk_bytes_reserved)
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&trans->new_bgs));
|
||||
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
|
||||
trans->chunk_bytes_reserved);
|
||||
trans->chunk_bytes_reserved = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* either allocate a new transaction or hop into the existing one
|
||||
*/
|
||||
|
@ -224,5 +224,6 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction);
|
||||
void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
|
||||
|
||||
#endif
|
||||
|
@ -132,6 +132,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
|
||||
struct btrfs_file_extent_item *fi;
|
||||
u32 sectorsize = fs_info->sectorsize;
|
||||
u32 item_size = btrfs_item_size_nr(leaf, slot);
|
||||
u64 extent_end;
|
||||
|
||||
if (!IS_ALIGNED(key->offset, sectorsize)) {
|
||||
file_extent_err(leaf, slot,
|
||||
@ -207,6 +208,16 @@ static int check_extent_data_item(struct extent_buffer *leaf,
|
||||
CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
|
||||
return -EUCLEAN;
|
||||
|
||||
/* Catch extent end overflow */
|
||||
if (check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
|
||||
key->offset, &extent_end)) {
|
||||
file_extent_err(leaf, slot,
|
||||
"extent end overflow, have file offset %llu extent num bytes %llu",
|
||||
key->offset,
|
||||
btrfs_file_extent_num_bytes(leaf, fi));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that no two consecutive file extent items, in the same leaf,
|
||||
* present ranges that overlap each other.
|
||||
|
@ -3322,6 +3322,30 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if an inode was logged in the current transaction. We can't always rely
|
||||
* on an inode's logged_trans value, because it's an in-memory only field and
|
||||
* therefore not persisted. This means that its value is lost if the inode gets
|
||||
* evicted and loaded again from disk (in which case it has a value of 0, and
|
||||
* certainly it is smaller then any possible transaction ID), when that happens
|
||||
* the full_sync flag is set in the inode's runtime flags, so on that case we
|
||||
* assume eviction happened and ignore the logged_trans value, assuming the
|
||||
* worst case, that the inode was logged before in the current transaction.
|
||||
*/
|
||||
static bool inode_logged(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode)
|
||||
{
|
||||
if (inode->logged_trans == trans->transid)
|
||||
return true;
|
||||
|
||||
if (inode->last_trans == trans->transid &&
|
||||
test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
|
||||
!test_bit(BTRFS_FS_LOG_RECOVERING, &trans->fs_info->flags))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If both a file and directory are logged, and unlinks or renames are
|
||||
* mixed in, we have a few interesting corners:
|
||||
@ -3356,7 +3380,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
|
||||
int bytes_del = 0;
|
||||
u64 dir_ino = btrfs_ino(dir);
|
||||
|
||||
if (dir->logged_trans < trans->transid)
|
||||
if (!inode_logged(trans, dir))
|
||||
return 0;
|
||||
|
||||
ret = join_running_log_trans(root);
|
||||
@ -3460,7 +3484,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
|
||||
u64 index;
|
||||
int ret;
|
||||
|
||||
if (inode->logged_trans < trans->transid)
|
||||
if (!inode_logged(trans, inode))
|
||||
return 0;
|
||||
|
||||
ret = join_running_log_trans(root);
|
||||
@ -5420,9 +5444,19 @@ log_extents:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't update last_log_commit if we logged that an inode exists after
|
||||
* it was loaded to memory (full_sync bit set).
|
||||
* This is to prevent data loss when we do a write to the inode, then
|
||||
* the inode gets evicted after all delalloc was flushed, then we log
|
||||
* it exists (due to a rename for example) and then fsync it. This last
|
||||
* fsync would do nothing (not logging the extents previously written).
|
||||
*/
|
||||
spin_lock(&inode->lock);
|
||||
inode->logged_trans = trans->transid;
|
||||
inode->last_log_commit = inode->last_sub_trans;
|
||||
if (inode_only != LOG_INODE_EXISTS ||
|
||||
!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
|
||||
inode->last_log_commit = inode->last_sub_trans;
|
||||
spin_unlock(&inode->lock);
|
||||
out_unlock:
|
||||
mutex_unlock(&inode->log_mutex);
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "dev-replace.h"
|
||||
#include "sysfs.h"
|
||||
#include "tree-checker.h"
|
||||
#include "space-info.h"
|
||||
|
||||
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID10] = {
|
||||
@ -123,12 +124,14 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
},
|
||||
};
|
||||
|
||||
const char *get_raid_name(enum btrfs_raid_types type)
|
||||
const char *btrfs_bg_type_to_raid_name(u64 flags)
|
||||
{
|
||||
if (type >= BTRFS_NR_RAID_TYPES)
|
||||
const int index = btrfs_bg_flags_to_raid_index(flags);
|
||||
|
||||
if (index >= BTRFS_NR_RAID_TYPES)
|
||||
return NULL;
|
||||
|
||||
return btrfs_raid_array[type].raid_name;
|
||||
return btrfs_raid_array[index].raid_name;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -237,7 +240,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
* chunk_mutex
|
||||
* -----------
|
||||
* protects chunks, adding or removing during allocation, trim or when a new
|
||||
* device is added/removed
|
||||
* device is added/removed. Additionally it also protects post_commit_list of
|
||||
* individual devices, since they can be added to the transaction's
|
||||
* post_commit_list only with chunk_mutex held.
|
||||
*
|
||||
* cleaner_mutex
|
||||
* -------------
|
||||
@ -1818,7 +1823,7 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
|
||||
struct rb_node *n;
|
||||
u64 ret = 0;
|
||||
|
||||
em_tree = &fs_info->mapping_tree.map_tree;
|
||||
em_tree = &fs_info->mapping_tree;
|
||||
read_lock(&em_tree->lock);
|
||||
n = rb_last(&em_tree->map.rb_root);
|
||||
if (n) {
|
||||
@ -2941,7 +2946,7 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_map *em;
|
||||
|
||||
em_tree = &fs_info->mapping_tree.map_tree;
|
||||
em_tree = &fs_info->mapping_tree;
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, logical, length);
|
||||
read_unlock(&em_tree->lock);
|
||||
@ -3474,6 +3479,18 @@ static int chunk_devid_filter(struct extent_buffer *leaf,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u64 calc_data_stripes(u64 type, int num_stripes)
|
||||
{
|
||||
const int index = btrfs_bg_flags_to_raid_index(type);
|
||||
const int ncopies = btrfs_raid_array[index].ncopies;
|
||||
const int nparity = btrfs_raid_array[index].nparity;
|
||||
|
||||
if (nparity)
|
||||
return num_stripes - nparity;
|
||||
else
|
||||
return num_stripes / ncopies;
|
||||
}
|
||||
|
||||
/* [pstart, pend) */
|
||||
static int chunk_drange_filter(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk,
|
||||
@ -3483,22 +3500,15 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
|
||||
int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
u64 stripe_offset;
|
||||
u64 stripe_length;
|
||||
u64 type;
|
||||
int factor;
|
||||
int i;
|
||||
|
||||
if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
|
||||
return 0;
|
||||
|
||||
if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
|
||||
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) {
|
||||
factor = num_stripes / 2;
|
||||
} else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) {
|
||||
factor = num_stripes - 1;
|
||||
} else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) {
|
||||
factor = num_stripes - 2;
|
||||
} else {
|
||||
factor = num_stripes;
|
||||
}
|
||||
type = btrfs_chunk_type(leaf, chunk);
|
||||
factor = calc_data_stripes(type, num_stripes);
|
||||
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
stripe = btrfs_stripe_nr(chunk, i);
|
||||
@ -3921,11 +3931,9 @@ static void describe_balance_args(struct btrfs_balance_args *bargs, char *buf,
|
||||
bp += ret; \
|
||||
} while (0)
|
||||
|
||||
if (flags & BTRFS_BALANCE_ARGS_CONVERT) {
|
||||
int index = btrfs_bg_flags_to_raid_index(bargs->target);
|
||||
|
||||
CHECK_APPEND_1ARG("convert=%s,", get_raid_name(index));
|
||||
}
|
||||
if (flags & BTRFS_BALANCE_ARGS_CONVERT)
|
||||
CHECK_APPEND_1ARG("convert=%s,",
|
||||
btrfs_bg_type_to_raid_name(bargs->target));
|
||||
|
||||
if (flags & BTRFS_BALANCE_ARGS_SOFT)
|
||||
CHECK_APPEND_NOARG("soft,");
|
||||
@ -4047,6 +4055,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||
u64 num_devices;
|
||||
unsigned seq;
|
||||
bool reducing_integrity;
|
||||
int i;
|
||||
|
||||
if (btrfs_fs_closing(fs_info) ||
|
||||
atomic_read(&fs_info->balance_pause_req) ||
|
||||
@ -4076,48 +4085,43 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
num_devices = btrfs_num_devices(fs_info);
|
||||
allowed = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++)
|
||||
if (num_devices >= btrfs_raid_array[i].devs_min)
|
||||
allowed |= btrfs_raid_array[i].bg_flag;
|
||||
|
||||
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
|
||||
if (num_devices > 1)
|
||||
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
|
||||
if (num_devices > 2)
|
||||
allowed |= BTRFS_BLOCK_GROUP_RAID5;
|
||||
if (num_devices > 3)
|
||||
allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_RAID6);
|
||||
if (validate_convert_profile(&bctl->data, allowed)) {
|
||||
int index = btrfs_bg_flags_to_raid_index(bctl->data.target);
|
||||
|
||||
btrfs_err(fs_info,
|
||||
"balance: invalid convert data profile %s",
|
||||
get_raid_name(index));
|
||||
btrfs_bg_type_to_raid_name(bctl->data.target));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (validate_convert_profile(&bctl->meta, allowed)) {
|
||||
int index = btrfs_bg_flags_to_raid_index(bctl->meta.target);
|
||||
|
||||
btrfs_err(fs_info,
|
||||
"balance: invalid convert metadata profile %s",
|
||||
get_raid_name(index));
|
||||
btrfs_bg_type_to_raid_name(bctl->meta.target));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (validate_convert_profile(&bctl->sys, allowed)) {
|
||||
int index = btrfs_bg_flags_to_raid_index(bctl->sys.target);
|
||||
|
||||
btrfs_err(fs_info,
|
||||
"balance: invalid convert system profile %s",
|
||||
get_raid_name(index));
|
||||
btrfs_bg_type_to_raid_name(bctl->sys.target));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* allow to reduce meta or sys integrity only if force set */
|
||||
allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_RAID5 |
|
||||
BTRFS_BLOCK_GROUP_RAID6;
|
||||
/*
|
||||
* Allow to reduce metadata or system integrity only if force set for
|
||||
* profiles with redundancy (copies, parity)
|
||||
*/
|
||||
allowed = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(btrfs_raid_array); i++) {
|
||||
if (btrfs_raid_array[i].ncopies >= 2 ||
|
||||
btrfs_raid_array[i].tolerated_failures >= 1)
|
||||
allowed |= btrfs_raid_array[i].bg_flag;
|
||||
}
|
||||
do {
|
||||
seq = read_seqbegin(&fs_info->profiles_lock);
|
||||
|
||||
@ -4152,12 +4156,18 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
|
||||
|
||||
if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
|
||||
btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
|
||||
int meta_index = btrfs_bg_flags_to_raid_index(meta_target);
|
||||
int data_index = btrfs_bg_flags_to_raid_index(data_target);
|
||||
|
||||
btrfs_warn(fs_info,
|
||||
"balance: metadata profile %s has lower redundancy than data profile %s",
|
||||
get_raid_name(meta_index), get_raid_name(data_index));
|
||||
btrfs_bg_type_to_raid_name(meta_target),
|
||||
btrfs_bg_type_to_raid_name(data_target));
|
||||
}
|
||||
|
||||
if (fs_info->send_in_progress) {
|
||||
btrfs_warn_rl(fs_info,
|
||||
"cannot run balance while send operations are in progress (%d in progress)",
|
||||
fs_info->send_in_progress);
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = insert_balance_item(fs_info, bctl);
|
||||
@ -4949,6 +4959,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
sub_stripes = btrfs_raid_array[index].sub_stripes;
|
||||
dev_stripes = btrfs_raid_array[index].dev_stripes;
|
||||
devs_max = btrfs_raid_array[index].devs_max;
|
||||
if (!devs_max)
|
||||
devs_max = BTRFS_MAX_DEVS(info);
|
||||
devs_min = btrfs_raid_array[index].devs_min;
|
||||
devs_increment = btrfs_raid_array[index].devs_increment;
|
||||
ncopies = btrfs_raid_array[index].ncopies;
|
||||
@ -4957,8 +4969,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
if (type & BTRFS_BLOCK_GROUP_DATA) {
|
||||
max_stripe_size = SZ_1G;
|
||||
max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
|
||||
if (!devs_max)
|
||||
devs_max = BTRFS_MAX_DEVS(info);
|
||||
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
|
||||
/* for larger filesystems, use larger metadata chunks */
|
||||
if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
|
||||
@ -4966,13 +4976,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
else
|
||||
max_stripe_size = SZ_256M;
|
||||
max_chunk_size = max_stripe_size;
|
||||
if (!devs_max)
|
||||
devs_max = BTRFS_MAX_DEVS(info);
|
||||
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
|
||||
max_stripe_size = SZ_32M;
|
||||
max_chunk_size = 2 * max_stripe_size;
|
||||
if (!devs_max)
|
||||
devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
|
||||
} else {
|
||||
btrfs_err(info, "invalid chunk type 0x%llx requested",
|
||||
type);
|
||||
@ -5143,7 +5149,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
em->block_len = em->len;
|
||||
em->orig_block_len = stripe_size;
|
||||
|
||||
em_tree = &info->mapping_tree.map_tree;
|
||||
em_tree = &info->mapping_tree;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
if (ret) {
|
||||
@ -5324,20 +5330,9 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans)
|
||||
|
||||
static inline int btrfs_chunk_max_errors(struct map_lookup *map)
|
||||
{
|
||||
int max_errors;
|
||||
const int index = btrfs_bg_flags_to_raid_index(map->type);
|
||||
|
||||
if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10 |
|
||||
BTRFS_BLOCK_GROUP_RAID5 |
|
||||
BTRFS_BLOCK_GROUP_DUP)) {
|
||||
max_errors = 1;
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
|
||||
max_errors = 2;
|
||||
} else {
|
||||
max_errors = 0;
|
||||
}
|
||||
|
||||
return max_errors;
|
||||
return btrfs_raid_array[index].tolerated_failures;
|
||||
}
|
||||
|
||||
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
||||
@ -5378,21 +5373,16 @@ end:
|
||||
return readonly;
|
||||
}
|
||||
|
||||
void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
|
||||
{
|
||||
extent_map_tree_init(&tree->map_tree);
|
||||
}
|
||||
|
||||
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
|
||||
void btrfs_mapping_tree_free(struct extent_map_tree *tree)
|
||||
{
|
||||
struct extent_map *em;
|
||||
|
||||
while (1) {
|
||||
write_lock(&tree->map_tree.lock);
|
||||
em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
|
||||
write_lock(&tree->lock);
|
||||
em = lookup_extent_mapping(tree, 0, (u64)-1);
|
||||
if (em)
|
||||
remove_extent_mapping(&tree->map_tree, em);
|
||||
write_unlock(&tree->map_tree.lock);
|
||||
remove_extent_mapping(tree, em);
|
||||
write_unlock(&tree->lock);
|
||||
if (!em)
|
||||
break;
|
||||
/* once for us */
|
||||
@ -5419,7 +5409,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
|
||||
return 1;
|
||||
|
||||
map = em->map_lookup;
|
||||
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
|
||||
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1_MASK))
|
||||
ret = map->num_stripes;
|
||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
|
||||
ret = map->sub_stripes;
|
||||
@ -5493,7 +5483,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *srcdev;
|
||||
|
||||
ASSERT((map->type &
|
||||
(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)));
|
||||
(BTRFS_BLOCK_GROUP_RAID1_MASK | BTRFS_BLOCK_GROUP_RAID10)));
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
|
||||
num_stripes = map->sub_stripes;
|
||||
@ -5682,7 +5672,7 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
|
||||
&remaining_stripes);
|
||||
div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
|
||||
last_stripe *= sub_stripes;
|
||||
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
} else if (map->type & (BTRFS_BLOCK_GROUP_RAID1_MASK |
|
||||
BTRFS_BLOCK_GROUP_DUP)) {
|
||||
num_stripes = map->num_stripes;
|
||||
} else {
|
||||
@ -5926,6 +5916,102 @@ static bool need_full_stripe(enum btrfs_map_op op)
|
||||
return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_get_io_geometry - calculates the geomery of a particular (address, len)
|
||||
* tuple. This information is used to calculate how big a
|
||||
* particular bio can get before it straddles a stripe.
|
||||
*
|
||||
* @fs_info - the filesystem
|
||||
* @logical - address that we want to figure out the geometry of
|
||||
* @len - the length of IO we are going to perform, starting at @logical
|
||||
* @op - type of operation - write or read
|
||||
* @io_geom - pointer used to return values
|
||||
*
|
||||
* Returns < 0 in case a chunk for the given logical address cannot be found,
|
||||
* usually shouldn't happen unless @logical is corrupted, 0 otherwise.
|
||||
*/
|
||||
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 len, struct btrfs_io_geometry *io_geom)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
u64 offset;
|
||||
u64 stripe_offset;
|
||||
u64 stripe_nr;
|
||||
u64 stripe_len;
|
||||
u64 raid56_full_stripe_start = (u64)-1;
|
||||
int data_stripes;
|
||||
|
||||
ASSERT(op != BTRFS_MAP_DISCARD);
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, logical, len);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
map = em->map_lookup;
|
||||
/* Offset of this logical address in the chunk */
|
||||
offset = logical - em->start;
|
||||
/* Len of a stripe in a chunk */
|
||||
stripe_len = map->stripe_len;
|
||||
/* Stripe wher this block falls in */
|
||||
stripe_nr = div64_u64(offset, stripe_len);
|
||||
/* Offset of stripe in the chunk */
|
||||
stripe_offset = stripe_nr * stripe_len;
|
||||
if (offset < stripe_offset) {
|
||||
btrfs_crit(fs_info,
|
||||
"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
|
||||
stripe_offset, offset, em->start, logical, stripe_len);
|
||||
free_extent_map(em);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* stripe_offset is the offset of this block in its stripe */
|
||||
stripe_offset = offset - stripe_offset;
|
||||
data_stripes = nr_data_stripes(map);
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
u64 max_len = stripe_len - stripe_offset;
|
||||
|
||||
/*
|
||||
* In case of raid56, we need to know the stripe aligned start
|
||||
*/
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
unsigned long full_stripe_len = stripe_len * data_stripes;
|
||||
raid56_full_stripe_start = offset;
|
||||
|
||||
/*
|
||||
* Allow a write of a full stripe, but make sure we
|
||||
* don't allow straddling of stripes
|
||||
*/
|
||||
raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
|
||||
full_stripe_len);
|
||||
raid56_full_stripe_start *= full_stripe_len;
|
||||
|
||||
/*
|
||||
* For writes to RAID[56], allow a full stripeset across
|
||||
* all disks. For other RAID types and for RAID[56]
|
||||
* reads, just allow a single stripe (on a single disk).
|
||||
*/
|
||||
if (op == BTRFS_MAP_WRITE) {
|
||||
max_len = stripe_len * data_stripes -
|
||||
(offset - raid56_full_stripe_start);
|
||||
}
|
||||
}
|
||||
len = min_t(u64, em->len - offset, max_len);
|
||||
} else {
|
||||
len = em->len - offset;
|
||||
}
|
||||
|
||||
io_geom->len = len;
|
||||
io_geom->offset = offset;
|
||||
io_geom->stripe_len = stripe_len;
|
||||
io_geom->stripe_nr = stripe_nr;
|
||||
io_geom->stripe_offset = stripe_offset;
|
||||
io_geom->raid56_stripe_offset = raid56_full_stripe_start;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
@ -5939,6 +6025,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
u64 stripe_nr;
|
||||
u64 stripe_len;
|
||||
u32 stripe_index;
|
||||
int data_stripes;
|
||||
int i;
|
||||
int ret = 0;
|
||||
int num_stripes;
|
||||
@ -5951,76 +6038,29 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
int patch_the_first_stripe_for_dev_replace = 0;
|
||||
u64 physical_to_patch_in_first_stripe = 0;
|
||||
u64 raid56_full_stripe_start = (u64)-1;
|
||||
struct btrfs_io_geometry geom;
|
||||
|
||||
ASSERT(bbio_ret);
|
||||
|
||||
if (op == BTRFS_MAP_DISCARD)
|
||||
return __btrfs_map_block_for_discard(fs_info, logical,
|
||||
*length, bbio_ret);
|
||||
|
||||
ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, logical, *length);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
ASSERT(em);
|
||||
map = em->map_lookup;
|
||||
offset = logical - em->start;
|
||||
|
||||
stripe_len = map->stripe_len;
|
||||
stripe_nr = offset;
|
||||
/*
|
||||
* stripe_nr counts the total number of stripes we have to stride
|
||||
* to get to this block
|
||||
*/
|
||||
stripe_nr = div64_u64(stripe_nr, stripe_len);
|
||||
|
||||
stripe_offset = stripe_nr * stripe_len;
|
||||
if (offset < stripe_offset) {
|
||||
btrfs_crit(fs_info,
|
||||
"stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu",
|
||||
stripe_offset, offset, em->start, logical,
|
||||
stripe_len);
|
||||
free_extent_map(em);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* stripe_offset is the offset of this block in its stripe*/
|
||||
stripe_offset = offset - stripe_offset;
|
||||
|
||||
/* if we're here for raid56, we need to know the stripe aligned start */
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
|
||||
raid56_full_stripe_start = offset;
|
||||
|
||||
/* allow a write of a full stripe, but make sure we don't
|
||||
* allow straddling of stripes
|
||||
*/
|
||||
raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
|
||||
full_stripe_len);
|
||||
raid56_full_stripe_start *= full_stripe_len;
|
||||
}
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
u64 max_len;
|
||||
/* For writes to RAID[56], allow a full stripeset across all disks.
|
||||
For other RAID types and for RAID[56] reads, just allow a single
|
||||
stripe (on a single disk). */
|
||||
if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
|
||||
(op == BTRFS_MAP_WRITE)) {
|
||||
max_len = stripe_len * nr_data_stripes(map) -
|
||||
(offset - raid56_full_stripe_start);
|
||||
} else {
|
||||
/* we limit the length of each bio to what fits in a stripe */
|
||||
max_len = stripe_len - stripe_offset;
|
||||
}
|
||||
*length = min_t(u64, em->len - offset, max_len);
|
||||
} else {
|
||||
*length = em->len - offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is for when we're called from btrfs_bio_fits_in_stripe and all
|
||||
* it cares about is the length
|
||||
*/
|
||||
if (!bbio_ret)
|
||||
goto out;
|
||||
*length = geom.len;
|
||||
offset = geom.offset;
|
||||
stripe_len = geom.stripe_len;
|
||||
stripe_nr = geom.stripe_nr;
|
||||
stripe_offset = geom.stripe_offset;
|
||||
raid56_full_stripe_start = geom.raid56_stripe_offset;
|
||||
data_stripes = nr_data_stripes(map);
|
||||
|
||||
down_read(&dev_replace->rwsem);
|
||||
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
|
||||
@ -6052,7 +6092,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
&stripe_index);
|
||||
if (!need_full_stripe(op))
|
||||
mirror_num = 1;
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
|
||||
if (need_full_stripe(op))
|
||||
num_stripes = map->num_stripes;
|
||||
else if (mirror_num)
|
||||
@ -6094,7 +6134,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
|
||||
/* push stripe_nr back to the start of the full stripe */
|
||||
stripe_nr = div64_u64(raid56_full_stripe_start,
|
||||
stripe_len * nr_data_stripes(map));
|
||||
stripe_len * data_stripes);
|
||||
|
||||
/* RAID[56] write or recovery. Return all stripes */
|
||||
num_stripes = map->num_stripes;
|
||||
@ -6110,10 +6150,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
* Mirror #3 is RAID6 Q block.
|
||||
*/
|
||||
stripe_nr = div_u64_rem(stripe_nr,
|
||||
nr_data_stripes(map), &stripe_index);
|
||||
data_stripes, &stripe_index);
|
||||
if (mirror_num > 1)
|
||||
stripe_index = nr_data_stripes(map) +
|
||||
mirror_num - 2;
|
||||
stripe_index = data_stripes + mirror_num - 2;
|
||||
|
||||
/* We distribute the parity blocks across stripes */
|
||||
div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
|
||||
@ -6171,8 +6210,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
div_u64_rem(stripe_nr, num_stripes, &rot);
|
||||
|
||||
/* Fill in the logical address of each stripe */
|
||||
tmp = stripe_nr * nr_data_stripes(map);
|
||||
for (i = 0; i < nr_data_stripes(map); i++)
|
||||
tmp = stripe_nr * data_stripes;
|
||||
for (i = 0; i < data_stripes; i++)
|
||||
bbio->raid_map[(i+rot) % num_stripes] =
|
||||
em->start + (tmp + i) * map->stripe_len;
|
||||
|
||||
@ -6687,7 +6726,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = leaf->fs_info;
|
||||
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct map_lookup *map;
|
||||
struct extent_map *em;
|
||||
u64 logical;
|
||||
@ -6712,9 +6751,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
read_lock(&map_tree->map_tree.lock);
|
||||
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
|
||||
read_unlock(&map_tree->map_tree.lock);
|
||||
read_lock(&map_tree->lock);
|
||||
em = lookup_extent_mapping(map_tree, logical, 1);
|
||||
read_unlock(&map_tree->lock);
|
||||
|
||||
/* already mapped? */
|
||||
if (em && em->start <= logical && em->start + em->len > logical) {
|
||||
@ -6783,9 +6822,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
|
||||
|
||||
}
|
||||
|
||||
write_lock(&map_tree->map_tree.lock);
|
||||
ret = add_extent_mapping(&map_tree->map_tree, em, 0);
|
||||
write_unlock(&map_tree->map_tree.lock);
|
||||
write_lock(&map_tree->lock);
|
||||
ret = add_extent_mapping(map_tree, em, 0);
|
||||
write_unlock(&map_tree->lock);
|
||||
if (ret < 0) {
|
||||
btrfs_err(fs_info,
|
||||
"failed to add chunk map, start=%llu len=%llu: %d",
|
||||
@ -7103,14 +7142,14 @@ out_short_read:
|
||||
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *failing_dev)
|
||||
{
|
||||
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
u64 next_start = 0;
|
||||
bool ret = true;
|
||||
|
||||
read_lock(&map_tree->map_tree.lock);
|
||||
em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1);
|
||||
read_unlock(&map_tree->map_tree.lock);
|
||||
read_lock(&map_tree->lock);
|
||||
em = lookup_extent_mapping(map_tree, 0, (u64)-1);
|
||||
read_unlock(&map_tree->lock);
|
||||
/* No chunk at all? Return false anyway */
|
||||
if (!em) {
|
||||
ret = false;
|
||||
@ -7148,10 +7187,10 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
|
||||
next_start = extent_map_end(em);
|
||||
free_extent_map(em);
|
||||
|
||||
read_lock(&map_tree->map_tree.lock);
|
||||
em = lookup_extent_mapping(&map_tree->map_tree, next_start,
|
||||
read_lock(&map_tree->lock);
|
||||
em = lookup_extent_mapping(map_tree, next_start,
|
||||
(u64)(-1) - next_start);
|
||||
read_unlock(&map_tree->map_tree.lock);
|
||||
read_unlock(&map_tree->lock);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
@ -7600,10 +7639,9 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
|
||||
*/
|
||||
int btrfs_bg_type_to_factor(u64 flags)
|
||||
{
|
||||
if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
return 2;
|
||||
return 1;
|
||||
const int index = btrfs_bg_flags_to_raid_index(flags);
|
||||
|
||||
return btrfs_raid_array[index].ncopies;
|
||||
}
|
||||
|
||||
|
||||
@ -7612,7 +7650,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 chunk_offset, u64 devid,
|
||||
u64 physical_offset, u64 physical_len)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *dev;
|
||||
@ -7701,7 +7739,7 @@ out:
|
||||
|
||||
static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree;
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct rb_node *node;
|
||||
int ret = 0;
|
||||
|
@ -23,6 +23,21 @@ struct btrfs_pending_bios {
|
||||
struct bio *tail;
|
||||
};
|
||||
|
||||
struct btrfs_io_geometry {
|
||||
/* remaining bytes before crossing a stripe */
|
||||
u64 len;
|
||||
/* offset of logical address in chunk */
|
||||
u64 offset;
|
||||
/* length of single IO stripe */
|
||||
u64 stripe_len;
|
||||
/* number of stripe where address falls */
|
||||
u64 stripe_nr;
|
||||
/* offset of address in stripe */
|
||||
u64 stripe_offset;
|
||||
/* offset of raid56 stripe into the chunk */
|
||||
u64 raid56_stripe_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* Use sequence counter to get consistent device stat data on
|
||||
* 32-bit processors.
|
||||
@ -43,8 +58,8 @@ struct btrfs_pending_bios {
|
||||
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
|
||||
|
||||
struct btrfs_device {
|
||||
struct list_head dev_list;
|
||||
struct list_head dev_alloc_list;
|
||||
struct list_head dev_list; /* device_list_mutex */
|
||||
struct list_head dev_alloc_list; /* chunk mutex */
|
||||
struct list_head post_commit_list; /* chunk mutex */
|
||||
struct btrfs_fs_devices *fs_devices;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
@ -229,9 +244,14 @@ struct btrfs_fs_devices {
|
||||
* this mutex lock.
|
||||
*/
|
||||
struct mutex device_list_mutex;
|
||||
|
||||
/* List of all devices, protected by device_list_mutex */
|
||||
struct list_head devices;
|
||||
|
||||
/* devices not currently being allocated */
|
||||
/*
|
||||
* Devices which can satisfy space allocation. Protected by
|
||||
* chunk_mutex
|
||||
*/
|
||||
struct list_head alloc_list;
|
||||
|
||||
struct btrfs_fs_devices *seed;
|
||||
@ -336,16 +356,16 @@ struct btrfs_device_info {
|
||||
};
|
||||
|
||||
struct btrfs_raid_attr {
|
||||
int sub_stripes; /* sub_stripes info for map */
|
||||
int dev_stripes; /* stripes per dev */
|
||||
int devs_max; /* max devs to use */
|
||||
int devs_min; /* min devs needed */
|
||||
int tolerated_failures; /* max tolerated fail devs */
|
||||
int devs_increment; /* ndevs has to be a multiple of this */
|
||||
int ncopies; /* how many copies to data has */
|
||||
int nparity; /* number of stripes worth of bytes to store
|
||||
u8 sub_stripes; /* sub_stripes info for map */
|
||||
u8 dev_stripes; /* stripes per dev */
|
||||
u8 devs_max; /* max devs to use */
|
||||
u8 devs_min; /* min devs needed */
|
||||
u8 tolerated_failures; /* max tolerated fail devs */
|
||||
u8 devs_increment; /* ndevs has to be a multiple of this */
|
||||
u8 ncopies; /* how many copies to data has */
|
||||
u8 nparity; /* number of stripes worth of bytes to store
|
||||
* parity information */
|
||||
int mindev_error; /* error code if min devs requisite is unmet */
|
||||
u8 mindev_error; /* error code if min devs requisite is unmet */
|
||||
const char raid_name[8]; /* name of the raid */
|
||||
u64 bg_flag; /* block group flag of the raid */
|
||||
};
|
||||
@ -408,13 +428,14 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_bio **bbio_ret);
|
||||
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
|
||||
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
|
||||
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
|
||||
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
|
||||
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
|
||||
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
|
||||
int mirror_num, int async_submit);
|
||||
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
|
||||
@ -557,8 +578,6 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
|
||||
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
|
||||
}
|
||||
|
||||
const char *get_raid_name(enum btrfs_raid_types type);
|
||||
|
||||
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
|
||||
|
||||
struct list_head *btrfs_get_fs_uuids(void);
|
||||
@ -568,6 +587,7 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_device *failing_dev);
|
||||
|
||||
int btrfs_bg_type_to_factor(u64 flags);
|
||||
const char *btrfs_bg_type_to_raid_name(u64 flags);
|
||||
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
|
||||
|
||||
#endif
|
||||
|
@ -29,6 +29,7 @@ struct btrfs_qgroup_extent_record;
|
||||
struct btrfs_qgroup;
|
||||
struct extent_io_tree;
|
||||
struct prelim_ref;
|
||||
struct btrfs_space_info;
|
||||
|
||||
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR);
|
||||
TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS);
|
||||
@ -2091,6 +2092,45 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__space_info_update,
|
||||
|
||||
TP_PROTO(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *sinfo, u64 old, s64 diff),
|
||||
|
||||
TP_ARGS(fs_info, sinfo, old, diff),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, type )
|
||||
__field( u64, old )
|
||||
__field( s64, diff )
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->type = sinfo->flags;
|
||||
__entry->old = old;
|
||||
__entry->diff = diff;
|
||||
),
|
||||
TP_printk_btrfs("type=%s old=%llu diff=%lld",
|
||||
__print_flags(__entry->type, "|", BTRFS_GROUP_FLAGS),
|
||||
__entry->old, __entry->diff)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__space_info_update, update_bytes_may_use,
|
||||
|
||||
TP_PROTO(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *sinfo, u64 old, s64 diff),
|
||||
|
||||
TP_ARGS(fs_info, sinfo, old, diff)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
|
||||
|
||||
TP_PROTO(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *sinfo, u64 old, s64 diff),
|
||||
|
||||
TP_ARGS(fs_info, sinfo, old, diff)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_BTRFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
@ -866,6 +866,8 @@ enum btrfs_raid_types {
|
||||
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
|
||||
BTRFS_BLOCK_GROUP_RAID6)
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1)
|
||||
|
||||
/*
|
||||
* We need a bit for restriper to be able to tell when chunks of type
|
||||
* SINGLE are available. This "extended" profile format is used in
|
||||
|
Loading…
Reference in New Issue
Block a user