Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "This pull is mostly cleanups and fixes:

   - The raid5/6 cleanups from Zhao Lei fixup some long standing warts
     in the code and add improvements on top of the scrubbing support
     from 3.19.

   - Josef has round one of our ENOSPC fixes coming from large btrfs
     clusters here at FB.

   - Dave Sterba continues a long series of cleanups (thanks Dave), and
     Filipe continues hammering on corner cases in fsync and others

  This all was held up a little trying to track down a use-after-free in
  btrfs raid5/6.  It's not clear yet if this is just made easier to
  trigger with this pull or if its a new bug from the raid5/6 cleanups.
  Dave Sterba is the only one to trigger it so far, but he has a
  consistent way to reproduce, so we'll get it nailed shortly"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits)
  Btrfs: don't remove extents and xattrs when logging new names
  Btrfs: fix fsync data loss after adding hard link to inode
  Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group
  Btrfs: account for large extents with enospc
  Btrfs: don't set and clear delalloc for O_DIRECT writes
  Btrfs: only adjust outstanding_extents when we do a short write
  btrfs: Fix out-of-space bug
  Btrfs: scrub, fix sleep in atomic context
  Btrfs: fix scheduler warning when syncing log
  Btrfs: Remove unnecessary placeholder in btrfs_err_code
  btrfs: cleanup init for list in free-space-cache
  btrfs: delete chunk allocation attemp when setting block group ro
  btrfs: clear bio reference after submit_one_bio()
  Btrfs: fix scrub race leading to use-after-free
  Btrfs: add missing cleanup on sysfs init failure
  Btrfs: fix race between transaction commit and empty block group removal
  btrfs: add more checks to btrfs_read_sys_array
  btrfs: cleanup, rename a few variables in btrfs_read_sys_array
  btrfs: add checks for sys_chunk_array sizes
  btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
  ...
This commit is contained in:
Linus Torvalds
2015-02-19 14:36:00 -08:00
34 changed files with 1065 additions and 863 deletions

View File

@@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
static void btrfs_split_extent_hook(struct inode *inode,
struct extent_state *orig, u64 split)
{
u64 size;
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
return;
size = orig->end - orig->start + 1;
if (size > BTRFS_MAX_EXTENT_SIZE) {
u64 num_extents;
u64 new_size;
/*
* We need the largest size of the remaining extent to see if we
* need to add a new outstanding extent. Think of the following
* case
*
* [MEAX_EXTENT_SIZEx2 - 4k][4k]
*
* The new_size would just be 4k and we'd think we had enough
* outstanding extents for this if we only took one side of the
* split, same goes for the other direction. We need to see if
* the larger size still is the same amount of extents as the
* original size, because if it is we need to add a new
* outstanding extent. But if we split up and the larger size
* is less than the original then we are good to go since we've
* already accounted for the extra extent in our original
* accounting.
*/
new_size = orig->end - split + 1;
if ((split - orig->start) > new_size)
new_size = split - orig->start;
num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) < num_extents)
return;
}
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
@@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode,
struct extent_state *new,
struct extent_state *other)
{
u64 new_size, old_size;
u64 num_extents;
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
return;
old_size = other->end - other->start + 1;
new_size = old_size + (new->end - new->start + 1);
/* we're not bigger than the max, unreserve the space and go */
if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
return;
}
/*
* If we grew by another max_extent, just return, we want to keep that
* reserved amount.
*/
num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE) > num_extents)
return;
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
spin_unlock(&BTRFS_I(inode)->lock);
@@ -1604,7 +1663,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
* have pending delalloc work to be done.
*/
static void btrfs_set_bit_hook(struct inode *inode,
struct extent_state *state, unsigned long *bits)
struct extent_state *state, unsigned *bits)
{
if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC))
@@ -1645,9 +1704,11 @@ static void btrfs_set_bit_hook(struct inode *inode,
*/
static void btrfs_clear_bit_hook(struct inode *inode,
struct extent_state *state,
unsigned long *bits)
unsigned *bits)
{
u64 len = state->end + 1 - state->start;
u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1,
BTRFS_MAX_EXTENT_SIZE);
spin_lock(&BTRFS_I(inode)->lock);
if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
@@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
*bits &= ~EXTENT_FIRST_DELALLOC;
} else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents--;
BTRFS_I(inode)->outstanding_extents -= num_extents;
spin_unlock(&BTRFS_I(inode)->lock);
}
@@ -2945,7 +3006,7 @@ static int __readpage_endio_check(struct inode *inode,
return 0;
zeroit:
if (__ratelimit(&_rs))
btrfs_info(BTRFS_I(inode)->root->fs_info,
btrfs_warn(BTRFS_I(inode)->root->fs_info,
"csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum, csum_expected);
memset(kaddr + pgoff, 1, len);
@@ -3407,7 +3468,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
out:
if (ret)
btrfs_crit(root->fs_info,
btrfs_err(root->fs_info,
"could not do orphan cleanup %d", ret);
btrfs_free_path(path);
return ret;
@@ -3490,7 +3551,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_inode_item *inode_item;
struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
unsigned long ptr;
@@ -3527,17 +3587,19 @@ static void btrfs_read_locked_inode(struct inode *inode)
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
tspec = btrfs_inode_atime(inode_item);
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
tspec = btrfs_inode_mtime(inode_item);
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
tspec = btrfs_inode_ctime(inode_item);
inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_timespec_sec(leaf, &inode_item->otime);
BTRFS_I(inode)->i_otime.tv_nsec =
btrfs_timespec_nsec(leaf, &inode_item->otime);
inode_set_bytes(inode, btrfs_inode_nbytes(leaf, inode_item));
BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item);
@@ -3656,21 +3718,26 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
btrfs_set_token_timespec_sec(leaf, &item->atime,
inode->i_atime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
btrfs_set_token_timespec_nsec(leaf, &item->atime,
inode->i_atime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
btrfs_set_token_timespec_sec(leaf, &item->mtime,
inode->i_mtime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
btrfs_set_token_timespec_nsec(leaf, &item->mtime,
inode->i_mtime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
btrfs_set_token_timespec_sec(leaf, &item->ctime,
inode->i_ctime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
btrfs_set_token_timespec_nsec(leaf, &item->ctime,
inode->i_ctime.tv_nsec, &token);
btrfs_set_token_timespec_sec(leaf, &item->otime,
BTRFS_I(inode)->i_otime.tv_sec, &token);
btrfs_set_token_timespec_nsec(leaf, &item->otime,
BTRFS_I(inode)->i_otime.tv_nsec, &token);
btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
&token);
btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
@@ -5007,6 +5074,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
struct btrfs_root *new_root;
struct btrfs_root_ref *ref;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;
int err = 0;
@@ -5017,9 +5085,12 @@ static int fixup_tree_root_location(struct btrfs_root *root,
}
err = -ENOENT;
ret = btrfs_find_item(root->fs_info->tree_root, path,
BTRFS_I(dir)->root->root_key.objectid,
location->objectid, BTRFS_ROOT_REF_KEY, NULL);
key.objectid = BTRFS_I(dir)->root->root_key.objectid;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = location->objectid;
ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, path,
0, 0);
if (ret) {
if (ret < 0)
err = ret;
@@ -5258,7 +5329,10 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
return inode;
}
@@ -5826,7 +5900,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
@@ -7134,11 +7213,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 start = iblock << inode->i_blkbits;
u64 lockstart, lockend;
u64 len = bh_result->b_size;
u64 orig_len = len;
int unlock_bits = EXTENT_LOCKED;
int ret = 0;
if (create)
unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
unlock_bits |= EXTENT_DIRTY;
else
len = min_t(u64, len, root->sectorsize);
@@ -7269,14 +7349,12 @@ unlock:
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockstart + len - 1, EXTENT_DELALLOC, NULL,
&cached_state, GFP_NOFS);
BUG_ON(ret);
if (len < orig_len) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
btrfs_free_reserved_data_space(inode, len);
}
/*
@@ -7805,8 +7883,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
}
/* async crcs make it difficult to collect full stripe writes. */
if (btrfs_get_alloc_profile(root, 1) &
(BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
if (btrfs_get_alloc_profile(root, 1) & BTRFS_BLOCK_GROUP_RAID56_MASK)
async_submit = 0;
else
async_submit = 1;
@@ -8053,8 +8130,6 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode,
count - (size_t)ret);
else
btrfs_delalloc_release_metadata(inode, 0);
}
out:
if (wakeup)
@@ -8575,6 +8650,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->delayed_node = NULL;
ei->i_otime.tv_sec = 0;
ei->i_otime.tv_nsec = 0;
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(&ei->io_tree, &inode->i_data);