Btrfs: Update metadata reservation for delayed allocation

Introduce metadata reservation context for delayed allocation
and update various related functions.

This patch also introduces EXTENT_FIRST_DELALLOC control bit for
set/clear_extent_bit. It tells set/clear_bit_hook whether they
are processing the first extent_state with EXTENT_DELALLOC bit
set. This change is important if set/clear_extent_bit involves
multiple extent_state.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Yan, Zheng 2010-05-16 10:48:47 -04:00 committed by Chris Mason
parent a22285a6a3
commit 0ca1f7ceb1
9 changed files with 232 additions and 415 deletions

View File

@ -137,8 +137,8 @@ struct btrfs_inode {
* of extent items we've reserved metadata for.
*/
spinlock_t accounting_lock;
atomic_t outstanding_extents;
int reserved_extents;
int outstanding_extents;
/*
* ordered_data_close is set by truncate when a file that used

View File

@ -2079,19 +2079,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items);
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes);
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes);
int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
int num_items, int *retries);
@ -2099,6 +2088,10 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_pending_snapshot *pending);
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
void btrfs_free_block_rsv(struct btrfs_root *root,

View File

@ -64,12 +64,6 @@ static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 num_bytes);
static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 to_reclaim);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@ -2880,189 +2874,14 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
BTRFS_BLOCK_GROUP_DATA);
}
static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items)
{
u64 num_bytes;
int level;
level = BTRFS_MAX_LEVEL - 2;
/*
* NOTE: these calculations are absolutely the worst possible case.
* This assumes that _every_ item we insert will require a new leaf, and
* that the tree has grown to its maximum level size.
*/
/*
* for every item we insert we could insert both an extent item and a
* extent ref item. Then for ever item we insert, we will need to cow
* both the original leaf, plus the leaf to the left and right of it.
*
* Unless we are talking about the extent root, then we just want the
* number of items * 2, since we just need the extent item plus its ref.
*/
if (root == root->fs_info->extent_root)
num_bytes = num_items * 2;
else
num_bytes = (num_items + (2 * num_items)) * 3;
/*
* num_bytes is total number of leaves we could need times the leaf
* size, and then for every leaf we could end up cow'ing 2 nodes per
* level, down to the leaf level.
*/
num_bytes = (num_bytes * root->leafsize) +
(num_bytes * (level * 2)) * root->nodesize;
return num_bytes;
}
/*
* Unreserve metadata space for delalloc. If we have less reserved credits than
* we have extents, this function does nothing.
*/
int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 num_bytes;
u64 alloc_target;
bool bug = false;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
num_items);
spin_lock(&meta_sinfo->lock);
spin_lock(&BTRFS_I(inode)->accounting_lock);
if (BTRFS_I(inode)->reserved_extents <=
BTRFS_I(inode)->outstanding_extents) {
spin_unlock(&BTRFS_I(inode)->accounting_lock);
spin_unlock(&meta_sinfo->lock);
return 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->reserved_extents -= num_items;
BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
if (meta_sinfo->bytes_delalloc < num_bytes) {
bug = true;
meta_sinfo->bytes_delalloc = 0;
} else {
meta_sinfo->bytes_delalloc -= num_bytes;
}
spin_unlock(&meta_sinfo->lock);
BUG_ON(bug);
return 0;
}
static void check_force_delalloc(struct btrfs_space_info *meta_sinfo)
{
u64 thresh;
thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
meta_sinfo->bytes_super + meta_sinfo->bytes_root +
meta_sinfo->bytes_may_use;
thresh = meta_sinfo->total_bytes - thresh;
thresh *= 80;
do_div(thresh, 100);
if (thresh <= meta_sinfo->bytes_delalloc)
meta_sinfo->force_delalloc = 1;
else
meta_sinfo->force_delalloc = 0;
}
/*
* Reserve metadata space for delalloc.
*/
int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root,
struct inode *inode, int num_items)
{
struct btrfs_fs_info *info = root->fs_info;
struct btrfs_space_info *meta_sinfo;
u64 num_bytes;
u64 used;
u64 alloc_target;
int flushed = 0;
int force_delalloc;
/* get the space info for where the metadata will live */
alloc_target = btrfs_get_alloc_profile(root, 0);
meta_sinfo = __find_space_info(info, alloc_target);
num_bytes = calculate_bytes_needed(root->fs_info->extent_root,
num_items);
again:
spin_lock(&meta_sinfo->lock);
force_delalloc = meta_sinfo->force_delalloc;
if (unlikely(!meta_sinfo->bytes_root))
meta_sinfo->bytes_root = calculate_bytes_needed(root, 6);
if (!flushed)
meta_sinfo->bytes_delalloc += num_bytes;
used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly +
meta_sinfo->bytes_super + meta_sinfo->bytes_root +
meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc;
if (used > meta_sinfo->total_bytes) {
flushed++;
if (flushed == 1) {
if (maybe_allocate_chunk(NULL, root, meta_sinfo,
num_bytes))
goto again;
flushed++;
} else {
spin_unlock(&meta_sinfo->lock);
}
if (flushed == 2) {
filemap_flush(inode->i_mapping);
goto again;
} else if (flushed == 3) {
shrink_delalloc(NULL, root, meta_sinfo, num_bytes);
goto again;
}
spin_lock(&meta_sinfo->lock);
meta_sinfo->bytes_delalloc -= num_bytes;
spin_unlock(&meta_sinfo->lock);
printk(KERN_ERR "enospc, has %d, reserved %d\n",
BTRFS_I(inode)->outstanding_extents,
BTRFS_I(inode)->reserved_extents);
dump_space_info(meta_sinfo, 0, 0);
return -ENOSPC;
}
BTRFS_I(inode)->reserved_extents += num_items;
check_force_delalloc(meta_sinfo);
spin_unlock(&meta_sinfo->lock);
if (!flushed && force_delalloc)
filemap_flush(inode->i_mapping);
return 0;
}
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
*/
int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 used;
int ret = 0, committed = 0;
@ -3147,12 +2966,13 @@ alloc:
}
/*
* if there was an error for whatever reason after calling
* btrfs_check_data_free_space, call this so we can cleanup the counters.
* called when we are clearing an delalloc extent from the
* inode's io_tree or there was an error for whatever reason
* after calling btrfs_check_data_free_space
*/
void btrfs_free_reserved_data_space(struct btrfs_root *root,
struct inode *inode, u64 bytes)
void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_space_info *data_sinfo;
/* make sure bytes are sectorsize aligned */
@ -3165,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root,
spin_unlock(&data_sinfo->lock);
}
/* called when we are adding a delalloc extent to the inode's io_tree */
void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *data_sinfo;
/* get the space info for where this inode will be storing its data */
data_sinfo = BTRFS_I(inode)->space_info;
/* make sure we have enough space to handle the data first */
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_delalloc += bytes;
/*
* we are adding a delalloc extent without calling
* btrfs_check_data_free_space first. This happens on a weird
* writepage condition, but shouldn't hurt our accounting
*/
if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
BTRFS_I(inode)->reserved_bytes = 0;
} else {
data_sinfo->bytes_may_use -= bytes;
BTRFS_I(inode)->reserved_bytes -= bytes;
}
spin_unlock(&data_sinfo->lock);
}
/* called when we are clearing an delalloc extent from the inode's io_tree */
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes)
{
struct btrfs_space_info *info;
info = BTRFS_I(inode)->space_info;
spin_lock(&info->lock);
info->bytes_delalloc -= bytes;
spin_unlock(&info->lock);
}
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
@ -3331,18 +3109,19 @@ static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_space_info *sinfo, u64 to_reclaim)
struct btrfs_root *root, u64 to_reclaim)
{
struct btrfs_block_rsv *block_rsv;
u64 reserved;
u64 max_reclaim;
u64 reclaimed = 0;
int pause = 1;
int ret;
spin_lock(&sinfo->lock);
reserved = sinfo->bytes_delalloc;
spin_unlock(&sinfo->lock);
block_rsv = &root->fs_info->delalloc_block_rsv;
spin_lock(&block_rsv->lock);
reserved = block_rsv->reserved;
spin_unlock(&block_rsv->lock);
if (reserved == 0)
return 0;
@ -3361,11 +3140,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
pause = 1;
}
spin_lock(&sinfo->lock);
if (reserved > sinfo->bytes_delalloc)
reclaimed = reserved - sinfo->bytes_delalloc;
reserved = sinfo->bytes_delalloc;
spin_unlock(&sinfo->lock);
spin_lock(&block_rsv->lock);
if (reserved > block_rsv->reserved)
reclaimed = reserved - block_rsv->reserved;
reserved = block_rsv->reserved;
spin_unlock(&block_rsv->lock);
if (reserved == 0 || reclaimed >= max_reclaim)
break;
@ -3394,7 +3173,7 @@ static int should_retry_reserve(struct btrfs_trans_handle *trans,
if (trans && trans->transaction->in_commit)
return -ENOSPC;
ret = shrink_delalloc(trans, root, space_info, num_bytes);
ret = shrink_delalloc(trans, root, num_bytes);
if (ret)
return ret;
@ -3754,6 +3533,108 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
}
static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
{
return num_bytes >>= 3;
}
int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
u64 to_reserve;
int nr_extents;
int retries = 0;
int ret;
if (btrfs_transaction_in_commit(root->fs_info))
schedule_timeout(1);
num_bytes = ALIGN(num_bytes, root->sectorsize);
again:
spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
if (nr_extents > BTRFS_I(inode)->reserved_extents) {
nr_extents -= BTRFS_I(inode)->reserved_extents;
to_reserve = calc_trans_metadata_size(root, nr_extents);
} else {
nr_extents = 0;
to_reserve = 0;
}
to_reserve += calc_csum_metadata_size(inode, num_bytes);
ret = reserve_metadata_bytes(block_rsv, to_reserve);
if (ret) {
spin_unlock(&BTRFS_I(inode)->accounting_lock);
ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
&retries);
if (ret > 0)
goto again;
return ret;
}
BTRFS_I(inode)->reserved_extents += nr_extents;
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_unlock(&BTRFS_I(inode)->accounting_lock);
block_rsv_add_bytes(block_rsv, to_reserve, 1);
if (block_rsv->size > 512 * 1024 * 1024)
shrink_delalloc(NULL, root, to_reserve);
return 0;
}
void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 to_free;
int nr_extents;
num_bytes = ALIGN(num_bytes, root->sectorsize);
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&BTRFS_I(inode)->accounting_lock);
nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
if (nr_extents < BTRFS_I(inode)->reserved_extents) {
nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
BTRFS_I(inode)->reserved_extents -= nr_extents;
} else {
nr_extents = 0;
}
spin_unlock(&BTRFS_I(inode)->accounting_lock);
to_free = calc_csum_metadata_size(inode, num_bytes);
if (nr_extents > 0)
to_free += calc_trans_metadata_size(root, nr_extents);
btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
to_free);
}
int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
{
int ret;
ret = btrfs_check_data_free_space(inode, num_bytes);
if (ret)
return ret;
ret = btrfs_delalloc_reserve_metadata(inode, num_bytes);
if (ret) {
btrfs_free_reserved_data_space(inode, num_bytes);
return ret;
}
return 0;
}
void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
{
btrfs_delalloc_release_metadata(inode, num_bytes);
btrfs_free_reserved_data_space(inode, num_bytes);
}
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc)

View File

@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree,
}
static int set_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->set_bit_hook) {
return tree->ops->set_bit_hook(tree->mapping->host,
state->start, state->end,
state->state, bits);
state, bits);
}
return 0;
}
static void clear_state_cb(struct extent_io_tree *tree,
struct extent_state *state,
unsigned long bits)
struct extent_state *state, int *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree,
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
int bits)
int *bits)
{
struct rb_node *node;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
int ret;
if (end < start) {
@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree,
if (ret)
return ret;
if (bits & EXTENT_DIRTY)
if (bits_to_set & EXTENT_DIRTY)
tree->dirty_bytes += end - start + 1;
state->state |= bits;
state->state |= bits_to_set;
node = tree_insert(&tree->state, end, &state->rb_node);
if (node) {
struct extent_state *found;
@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
* struct is freed and removed from the tree
*/
static int clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state, int bits, int wake,
int delete)
struct extent_state *state,
int *bits, int wake)
{
int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING;
int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
WARN_ON(range > tree->dirty_bytes);
tree->dirty_bytes -= range;
@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree,
state->state &= ~bits_to_clear;
if (wake)
wake_up(&state->wq);
if (delete || state->state == 0) {
if (state->state == 0) {
if (state->tree) {
clear_state_cb(tree, state, state->state);
rb_erase(&state->rb_node, &tree->state);
state->tree = NULL;
free_extent_state(state);
@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int set = 0;
int clear = 0;
if (delete)
bits |= ~EXTENT_CTLBITS;
bits |= EXTENT_FIRST_DELALLOC;
if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
clear = 1;
again:
@ -580,8 +581,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
set |= clear_state_bit(tree, state, bits, wake,
delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@ -602,7 +602,7 @@ hit_next:
if (wake)
wake_up(&state->wq);
set |= clear_state_bit(tree, prealloc, bits, wake, delete);
set |= clear_state_bit(tree, prealloc, &bits, wake);
prealloc = NULL;
goto out;
@ -613,7 +613,7 @@ hit_next:
else
next_node = NULL;
set |= clear_state_bit(tree, state, bits, wake, delete);
set |= clear_state_bit(tree, state, &bits, wake);
if (last_end == (u64)-1)
goto out;
start = last_end + 1;
@ -706,19 +706,19 @@ out:
static int set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
int bits)
int *bits)
{
int ret;
int bits_to_set = *bits & ~EXTENT_CTLBITS;
ret = set_state_cb(tree, state, bits);
if (ret)
return ret;
if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
tree->dirty_bytes += range;
}
state->state |= bits;
state->state |= bits_to_set;
return 0;
}
@ -757,6 +757,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
bits |= EXTENT_FIRST_DELALLOC;
again:
if (!prealloc && (mask & __GFP_WAIT)) {
prealloc = alloc_extent_state(mask);
@ -778,7 +779,7 @@ again:
*/
node = tree_search(tree, start);
if (!node) {
err = insert_state(tree, prealloc, start, end, bits);
err = insert_state(tree, prealloc, start, end, &bits);
prealloc = NULL;
BUG_ON(err == -EEXIST);
goto out;
@ -802,7 +803,7 @@ hit_next:
goto out;
}
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
@ -852,7 +853,7 @@ hit_next:
if (err)
goto out;
if (state->end <= end) {
err = set_state_bits(tree, state, bits);
err = set_state_bits(tree, state, &bits);
if (err)
goto out;
cache_state(state, cached_state);
@ -877,7 +878,7 @@ hit_next:
else
this_end = last_start - 1;
err = insert_state(tree, prealloc, start, this_end,
bits);
&bits);
BUG_ON(err == -EEXIST);
if (err) {
prealloc = NULL;
@ -903,7 +904,7 @@ hit_next:
err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST);
err = set_state_bits(tree, prealloc, bits);
err = set_state_bits(tree, prealloc, &bits);
if (err) {
prealloc = NULL;
goto out;
@ -966,8 +967,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0,
NULL, mask);
EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
}
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
@ -1435,9 +1435,6 @@ int extent_clear_unlock_delalloc(struct inode *inode,
if (op & EXTENT_CLEAR_DELALLOC)
clear_bits |= EXTENT_DELALLOC;
if (op & EXTENT_CLEAR_ACCOUNTING)
clear_bits |= EXTENT_DO_ACCOUNTING;
clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |

View File

@ -16,7 +16,9 @@
#define EXTENT_BOUNDARY (1 << 9)
#define EXTENT_NODATASUM (1 << 10)
#define EXTENT_DO_ACCOUNTING (1 << 11)
#define EXTENT_FIRST_DELALLOC (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
/* flags for bio submission */
#define EXTENT_BIO_COMPRESSED 1
@ -69,10 +71,10 @@ struct extent_io_ops {
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits);
int (*set_bit_hook)(struct inode *inode, struct extent_state *state,
int *bits);
int (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
unsigned long bits);
int *bits);
int (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);

View File

@ -852,13 +852,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* do the reserve before the mutex lock in case we have to do some
* flushing. We wouldn't deadlock, but this is more polite.
*/
err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (err)
goto out_nolock;
mutex_lock(&inode->i_mutex);
current->backing_dev_info = inode->i_mapping->backing_dev_info;
@ -921,7 +914,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
WARN_ON(num_pages > nrptrs);
memset(pages, 0, sizeof(struct page *) * nrptrs);
ret = btrfs_check_data_free_space(root, inode, write_bytes);
ret = btrfs_delalloc_reserve_space(inode, write_bytes);
if (ret)
goto out;
@ -929,26 +922,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
pos, first_index, last_index,
write_bytes);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
ret = btrfs_copy_from_user(pos, num_pages,
write_bytes, pages, buf);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_drop_pages(pages, num_pages);
goto out;
if (ret == 0) {
dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
}
ret = dirty_and_release_pages(NULL, root, file, pages,
num_pages, pos, write_bytes);
btrfs_drop_pages(pages, num_pages);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
write_bytes);
btrfs_delalloc_release_space(inode, write_bytes);
goto out;
}
@ -975,9 +962,7 @@ out:
mutex_unlock(&inode->i_mutex);
if (ret)
err = ret;
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
out_nolock:
kfree(pages);
if (pinned[0])
page_cache_release(pinned[0]);

View File

@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
inline_len, compressed_size,
compressed_pages);
BUG_ON(ret);
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
}
@ -414,6 +415,7 @@ again:
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
/* lets try to make an inline extent */
if (ret || total_in < (actual_end - start)) {
@ -439,7 +441,6 @@ again:
start, end, NULL,
EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
EXTENT_CLEAR_DELALLOC |
EXTENT_CLEAR_ACCOUNTING |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
btrfs_end_transaction(trans, root);
@ -734,6 +735,7 @@ static noinline int cow_file_range(struct inode *inode,
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
actual_end = min_t(u64, isize, end + 1);
@ -753,7 +755,6 @@ static noinline int cow_file_range(struct inode *inode,
EXTENT_CLEAR_UNLOCK_PAGE |
EXTENT_CLEAR_UNLOCK |
EXTENT_CLEAR_DELALLOC |
EXTENT_CLEAR_ACCOUNTING |
EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK |
EXTENT_END_WRITEBACK);
@ -1226,15 +1227,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
}
static int btrfs_split_extent_hook(struct inode *inode,
struct extent_state *orig, u64 split)
struct extent_state *orig, u64 split)
{
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return 0;
spin_lock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
return 0;
}
@ -1252,10 +1251,7 @@ static int btrfs_merge_extent_hook(struct inode *inode,
if (!(other->state & EXTENT_DELALLOC))
return 0;
spin_lock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
return 0;
}
@ -1264,8 +1260,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits)
static int btrfs_set_bit_hook(struct inode *inode,
struct extent_state *state, int *bits)
{
/*
@ -1273,17 +1269,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
* but in this case, we are only testeing for the DELALLOC
* bit, which is only set or cleared with irqs on
*/
if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
spin_lock(&BTRFS_I(inode)->accounting_lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
btrfs_delalloc_reserve_space(root, inode, end - start + 1);
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
else
atomic_inc(&BTRFS_I(inode)->outstanding_extents);
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += end - start + 1;
root->fs_info->delalloc_bytes += end - start + 1;
BTRFS_I(inode)->delalloc_bytes += len;
root->fs_info->delalloc_bytes += len;
if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
&root->fs_info->delalloc_inodes);
@ -1297,45 +1294,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
static int btrfs_clear_bit_hook(struct inode *inode,
struct extent_state *state, unsigned long bits)
struct extent_state *state, int *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
* but in this case, we are only testeing for the DELALLOC
* bit, which is only set or cleared with irqs on
*/
if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
if (bits & EXTENT_DO_ACCOUNTING) {
spin_lock(&BTRFS_I(inode)->accounting_lock);
WARN_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
}
if (*bits & EXTENT_FIRST_DELALLOC)
*bits &= ~EXTENT_FIRST_DELALLOC;
else if (!(*bits & EXTENT_DO_ACCOUNTING))
atomic_dec(&BTRFS_I(inode)->outstanding_extents);
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
btrfs_free_reserved_data_space(inode, len);
spin_lock(&root->fs_info->delalloc_lock);
if (state->end - state->start + 1 >
root->fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs warning: delalloc account "
"%llu %llu\n",
(unsigned long long)
state->end - state->start + 1,
(unsigned long long)
root->fs_info->delalloc_bytes);
btrfs_delalloc_free_space(root, inode, (u64)-1);
root->fs_info->delalloc_bytes = 0;
BTRFS_I(inode)->delalloc_bytes = 0;
} else {
btrfs_delalloc_free_space(root, inode,
state->end -
state->start + 1);
root->fs_info->delalloc_bytes -= state->end -
state->start + 1;
BTRFS_I(inode)->delalloc_bytes -= state->end -
state->start + 1;
}
root->fs_info->delalloc_bytes -= len;
BTRFS_I(inode)->delalloc_bytes -= len;
if (BTRFS_I(inode)->delalloc_bytes == 0 &&
!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@ -1520,6 +1504,7 @@ again:
goto again;
}
BUG();
btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
ClearPageChecked(page);
out:
@ -1650,7 +1635,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_trans_handle *trans = NULL;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
@ -1668,9 +1653,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
if (!ret) {
trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
}
goto out;
}
@ -1680,6 +1666,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
0, &cached_state, GFP_NOFS);
trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compressed = 1;
@ -1711,12 +1699,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
/* this also removes the ordered extent from the tree */
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
btrfs_end_transaction(trans, root);
out:
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
if (trans)
btrfs_end_transaction(trans, root);
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@ -3219,11 +3208,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
if ((offset & (blocksize - 1)) == 0)
goto out;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
if (ret)
goto out;
ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
goto out;
@ -3231,8 +3216,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
again:
page = grab_cache_page(mapping, index);
if (!page) {
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
}
@ -3295,8 +3279,7 @@ again:
out_unlock:
if (ret)
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
unlock_page(page);
page_cache_release(page);
out:
@ -4878,6 +4861,7 @@ again:
}
flush_dcache_page(page);
} else if (create && PageUptodate(page)) {
WARN_ON(1);
if (!trans) {
kunmap(page);
free_extent_map(em);
@ -5142,7 +5126,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
u64 page_start;
u64 page_end;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret) {
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
@ -5151,13 +5135,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out;
}
ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (ret) {
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
ret = VM_FAULT_SIGBUS;
goto out;
}
ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
again:
lock_page(page);
@ -5167,7 +5144,6 @@ again:
if ((page->mapping != inode->i_mapping) ||
(page_start >= size)) {
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
/* page got truncated out from underneath us */
goto out_unlock;
}
@ -5208,7 +5184,6 @@ again:
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
ret = VM_FAULT_SIGBUS;
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
goto out_unlock;
}
ret = 0;
@ -5235,10 +5210,10 @@ again:
unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
out_unlock:
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
if (!ret)
return VM_FAULT_LOCKED;
unlock_page(page);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
out:
return ret;
}
@ -5383,7 +5358,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_unlink_trans = 0;
spin_lock_init(&ei->accounting_lock);
ei->outstanding_extents = 0;
atomic_set(&ei->outstanding_extents, 0);
ei->reserved_extents = 0;
ei->ordered_data_close = 0;
@ -5411,6 +5386,8 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
WARN_ON(BTRFS_I(inode)->reserved_extents);
/*
* This can happen where we create an inode, but somebody else also
@ -5970,8 +5947,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
goto out;
}
ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
alloc_end - alloc_start);
ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
if (ret)
goto out;
@ -6037,8 +6013,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_NOFS);
btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
alloc_end - alloc_start);
btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;

View File

@ -587,19 +587,9 @@ static int btrfs_defrag_file(struct file *file,
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = 1;
ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
if (ret) {
ret = -ENOSPC;
break;
}
ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
if (ret) {
btrfs_free_reserved_data_space(root, inode,
PAGE_CACHE_SIZE);
ret = -ENOSPC;
break;
}
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret)
goto err_unlock;
again:
if (inode->i_size == 0 ||
i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
@ -608,8 +598,10 @@ again:
}
page = grab_cache_page(inode->i_mapping, i);
if (!page)
if (!page) {
ret = -ENOMEM;
goto err_reservations;
}
if (!PageUptodate(page)) {
btrfs_readpage(NULL, page);
@ -617,6 +609,7 @@ again:
if (!PageUptodate(page)) {
unlock_page(page);
page_cache_release(page);
ret = -EIO;
goto err_reservations;
}
}
@ -630,8 +623,7 @@ again:
wait_on_page_writeback(page);
if (PageDirty(page)) {
btrfs_free_reserved_data_space(root, inode,
PAGE_CACHE_SIZE);
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto loop_unlock;
}
@ -669,7 +661,6 @@ loop_unlock:
page_cache_release(page);
mutex_unlock(&inode->i_mutex);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
i++;
}
@ -699,9 +690,9 @@ loop_unlock:
return 0;
err_reservations:
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
err_unlock:
mutex_unlock(&inode->i_mutex);
btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
return ret;
}

View File

@ -311,13 +311,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
spin_lock(&BTRFS_I(inode)->accounting_lock);
WARN_ON(!BTRFS_I(inode)->outstanding_extents);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->accounting_lock);
btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
inode, 1);
spin_lock(&root->fs_info->ordered_extent_lock);
list_del_init(&entry->root_extent_list);