From f87f057b49ee52cf5c627ab27a706e3252767c9f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 1 Aug 2008 11:27:23 -0400 Subject: [PATCH] Btrfs: Improve and cleanup locking done by walk_down_tree While dropping snapshots, walk_down_tree does most of the work of checking reference counts and limiting tree traversal to just the blocks that we are freeing. It dropped and held the allocation mutex in strange and confusing ways, this commit changes it to only hold the mutex while actually freeing a block. The rest of the checks around reference counts should be safe without the lock because we only allow one process in btrfs_drop_snapshot at a time. Other processes dropping reference counts should not drop it to 1 because their tree roots already have an extra ref on the block. Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 85 ++++++++++++++++++++++++++++-------------- fs/btrfs/file.c | 13 ++++--- fs/btrfs/ioctl.c | 6 +++ 3 files changed, 70 insertions(+), 34 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b003b4364ddb..58bceeeda9c5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2333,8 +2333,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); - mutex_unlock(&root->fs_info->alloc_mutex); - for (i = 0; i < nritems; i++) { u64 disk_bytenr; cond_resched(); @@ -2362,8 +2360,6 @@ static int noinline drop_leaf_ref_no_cache(struct btrfs_trans_handle *trans, mutex_unlock(&root->fs_info->alloc_mutex); BUG_ON(ret); } - - mutex_lock(&root->fs_info->alloc_mutex); return 0; } @@ -2375,7 +2371,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, int ret; struct btrfs_extent_info *info = ref->extents; - mutex_unlock(&root->fs_info->alloc_mutex); for (i = 0; i < ref->nritems; i++) { mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, @@ -2386,7 +2381,6 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans, BUG_ON(ret); info++; } - mutex_lock(&root->fs_info->alloc_mutex); return 0; } @@ -2440,10 +2434,39 @@ int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len, u32 *refs) { int ret; - mutex_unlock(&root->fs_info->alloc_mutex); + ret = lookup_extent_ref(NULL, root, start, len, refs); + BUG_ON(ret); + +#if 0 // some debugging code in case we see problems here + /* if the refs count is one, it won't get increased again. But + * if the ref count is > 1, someone may be decreasing it at + * the same time we are. + */ + if (*refs != 1) { + struct extent_buffer *eb = NULL; + eb = btrfs_find_create_tree_block(root, start, len); + if (eb) + btrfs_tree_lock(eb); + + mutex_lock(&root->fs_info->alloc_mutex); + ret = lookup_extent_ref(NULL, root, start, len, refs); + BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); + + if (eb) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + if (*refs == 1) { + printk("block %llu went down to one during drop_snap\n", + (unsigned long long)start); + } + + } +#endif + cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); return ret; } @@ -2467,8 +2490,6 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, int ret; u32 refs; - mutex_lock(&root->fs_info->alloc_mutex); - WARN_ON(*level < 0); WARN_ON(*level >= BTRFS_MAX_LEVEL); ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, @@ -2507,13 +2528,21 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); path->slots[*level]++; + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); BUG_ON(ret); + mutex_unlock(&root->fs_info->alloc_mutex); continue; } - + /* + * at this point, we have a single ref, and since the + * only place referencing this extent is a dead root + * the reference count should never go higher. + * So, we don't need to check it again + */ if (*level == 1) { struct btrfs_key key; btrfs_node_key_to_cpu(cur, &key, path->slots[*level]); @@ -2533,33 +2562,23 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { free_extent_buffer(next); - mutex_unlock(&root->fs_info->alloc_mutex); if (path->slots[*level] == 0) reada_walk_down(root, cur, path->slots[*level]); next = read_tree_block(root, bytenr, blocksize, ptr_gen); cond_resched(); - mutex_lock(&root->fs_info->alloc_mutex); - - /* we've dropped the lock, double check */ +#if 0 + /* + * this is a debugging check and can go away + * the ref should never go all the way down to 1 + * at this point + */ ret = lookup_extent_ref(NULL, root, bytenr, blocksize, &refs); BUG_ON(ret); - if (refs != 1) { - parent = path->nodes[*level]; - root_owner = btrfs_header_owner(parent); - root_gen = btrfs_header_generation(parent); - - path->slots[*level]++; - free_extent_buffer(next); - ret = __btrfs_free_extent(trans, root, bytenr, - blocksize, - root_owner, - root_gen, 0, 0, 1); - BUG_ON(ret); - continue; - } + WARN_ON(refs != 1); +#endif } WARN_ON(*level <= 0); if (path->nodes[*level-1]) @@ -2584,6 +2603,8 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); + + mutex_lock(&root->fs_info->alloc_mutex); ret = __btrfs_free_extent(trans, root, bytenr, blocksize, root_owner, root_gen, 0, 0, 1); free_extent_buffer(path->nodes[*level]); @@ -2591,6 +2612,7 @@ out: *level += 1; BUG_ON(ret); mutex_unlock(&root->fs_info->alloc_mutex); + cond_resched(); return 0; } @@ -2834,6 +2856,11 @@ again: } set_page_extent_mapped(page); + /* + * make sure page_mkwrite is called for this page if userland + * wants to change it from mmap + */ + clear_page_dirty_for_io(page); set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c78f184ee5cc..8915f2dc1bce 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -338,6 +338,13 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans, btrfs_drop_extent_cache(inode, start_pos, aligned_end - 1); BUG_ON(err); mutex_unlock(&BTRFS_I(inode)->extent_mutex); + + /* + * an ugly way to do all the prop accounting around + * the page bits and mapping tags + */ + set_page_writeback(pages[0]); + end_page_writeback(pages[0]); did_inline = 1; } if (end_pos > isize) { @@ -833,11 +840,7 @@ again: start_pos, last_pos - 1, GFP_NOFS); } for (i = 0; i < num_pages; i++) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - ClearPageDirty(pages[i]); -#else - cancel_dirty_page(pages[i], PAGE_CACHE_SIZE); -#endif + clear_page_dirty_for_io(pages[i]); set_page_extent_mapped(pages[i]); WARN_ON(!PageLocked(pages[i])); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5e627746c4e8..224da287b3ed 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -268,6 +268,12 @@ again: } set_page_extent_mapped(page); + /* + * this makes sure page_mkwrite is called on the + * page if it is dirtied again later + */ + clear_page_dirty_for_io(page); + set_extent_delalloc(io_tree, page_start, page_end, GFP_NOFS);