forked from Minki/linux
d7df025eb4
Pull btrfs fixes from Chris Mason: "It turns out that we had two crc bugs when running fsx-linux in a loop. Many thanks to Josef, Miao Xie, and Dave Sterba for nailing it all down. Miao also has a new OOM fix in this v2 pull as well. Ilya fixed a regression Liu Bo found in the balance ioctls for pausing and resuming a running balance across drives. Josef's orphan truncate patch fixes an obscure corruption we'd see during xfstests. Arne's patches address problems with subvolume quotas. If the user destroys quota groups incorrectly the FS will refuse to mount. The rest are smaller fixes and plugs for memory leaks." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (30 commits) Btrfs: fix repeated delalloc work allocation Btrfs: fix wrong max device number for single profile Btrfs: fix missed transaction->aborted check Btrfs: Add ACCESS_ONCE() to transaction->abort accesses Btrfs: put csums on the right ordered extent Btrfs: use right range to find checksum for compressed extents Btrfs: fix panic when recovering tree log Btrfs: do not allow logged extents to be merged or removed Btrfs: fix a regression in balance usage filter Btrfs: prevent qgroup destroy when there are still relations Btrfs: ignore orphan qgroup relations Btrfs: reorder locks and sanity checks in btrfs_ioctl_defrag Btrfs: fix unlock order in btrfs_ioctl_rm_dev Btrfs: fix unlock order in btrfs_ioctl_resize Btrfs: fix "mutually exclusive op is running" error code Btrfs: bring back balance pause/resume logic btrfs: update timestamps on truncate() btrfs: fix btrfs_cont_expand() freeing IS_ERR em Btrfs: fix a bug when llseek for delalloc bytes behind prealloc extents Btrfs: fix off-by-one in lseek ...
421 lines
10 KiB
C
421 lines
10 KiB
C
#include <linux/err.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/module.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/hardirq.h>
|
|
#include "ctree.h"
|
|
#include "extent_map.h"
|
|
|
|
|
|
static struct kmem_cache *extent_map_cache;
|
|
|
|
int __init extent_map_init(void)
|
|
{
|
|
extent_map_cache = kmem_cache_create("btrfs_extent_map",
|
|
sizeof(struct extent_map), 0,
|
|
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
|
|
if (!extent_map_cache)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
void extent_map_exit(void)
|
|
{
|
|
if (extent_map_cache)
|
|
kmem_cache_destroy(extent_map_cache);
|
|
}
|
|
|
|
/**
|
|
* extent_map_tree_init - initialize extent map tree
|
|
* @tree: tree to initialize
|
|
*
|
|
* Initialize the extent tree @tree. Should be called for each new inode
|
|
* or other user of the extent_map interface.
|
|
*/
|
|
void extent_map_tree_init(struct extent_map_tree *tree)
|
|
{
|
|
tree->map = RB_ROOT;
|
|
INIT_LIST_HEAD(&tree->modified_extents);
|
|
rwlock_init(&tree->lock);
|
|
}
|
|
|
|
/**
|
|
* alloc_extent_map - allocate new extent map structure
|
|
*
|
|
* Allocate a new extent_map structure. The new structure is
|
|
* returned with a reference count of one and needs to be
|
|
* freed using free_extent_map()
|
|
*/
|
|
struct extent_map *alloc_extent_map(void)
|
|
{
|
|
struct extent_map *em;
|
|
em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
|
|
if (!em)
|
|
return NULL;
|
|
em->in_tree = 0;
|
|
em->flags = 0;
|
|
em->compress_type = BTRFS_COMPRESS_NONE;
|
|
em->generation = 0;
|
|
atomic_set(&em->refs, 1);
|
|
INIT_LIST_HEAD(&em->list);
|
|
return em;
|
|
}
|
|
|
|
/**
|
|
* free_extent_map - drop reference count of an extent_map
|
|
* @em: extent map beeing releasead
|
|
*
|
|
* Drops the reference out on @em by one and free the structure
|
|
* if the reference count hits zero.
|
|
*/
|
|
void free_extent_map(struct extent_map *em)
|
|
{
|
|
if (!em)
|
|
return;
|
|
WARN_ON(atomic_read(&em->refs) == 0);
|
|
if (atomic_dec_and_test(&em->refs)) {
|
|
WARN_ON(em->in_tree);
|
|
WARN_ON(!list_empty(&em->list));
|
|
kmem_cache_free(extent_map_cache, em);
|
|
}
|
|
}
|
|
|
|
static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
|
|
struct rb_node *node)
|
|
{
|
|
struct rb_node **p = &root->rb_node;
|
|
struct rb_node *parent = NULL;
|
|
struct extent_map *entry;
|
|
|
|
while (*p) {
|
|
parent = *p;
|
|
entry = rb_entry(parent, struct extent_map, rb_node);
|
|
|
|
WARN_ON(!entry->in_tree);
|
|
|
|
if (offset < entry->start)
|
|
p = &(*p)->rb_left;
|
|
else if (offset >= extent_map_end(entry))
|
|
p = &(*p)->rb_right;
|
|
else
|
|
return parent;
|
|
}
|
|
|
|
entry = rb_entry(node, struct extent_map, rb_node);
|
|
entry->in_tree = 1;
|
|
rb_link_node(node, parent, p);
|
|
rb_insert_color(node, root);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* search through the tree for an extent_map with a given offset. If
|
|
* it can't be found, try to find some neighboring extents
|
|
*/
|
|
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
|
struct rb_node **prev_ret,
|
|
struct rb_node **next_ret)
|
|
{
|
|
struct rb_node *n = root->rb_node;
|
|
struct rb_node *prev = NULL;
|
|
struct rb_node *orig_prev = NULL;
|
|
struct extent_map *entry;
|
|
struct extent_map *prev_entry = NULL;
|
|
|
|
while (n) {
|
|
entry = rb_entry(n, struct extent_map, rb_node);
|
|
prev = n;
|
|
prev_entry = entry;
|
|
|
|
WARN_ON(!entry->in_tree);
|
|
|
|
if (offset < entry->start)
|
|
n = n->rb_left;
|
|
else if (offset >= extent_map_end(entry))
|
|
n = n->rb_right;
|
|
else
|
|
return n;
|
|
}
|
|
|
|
if (prev_ret) {
|
|
orig_prev = prev;
|
|
while (prev && offset >= extent_map_end(prev_entry)) {
|
|
prev = rb_next(prev);
|
|
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
|
}
|
|
*prev_ret = prev;
|
|
prev = orig_prev;
|
|
}
|
|
|
|
if (next_ret) {
|
|
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
|
while (prev && offset < prev_entry->start) {
|
|
prev = rb_prev(prev);
|
|
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
|
}
|
|
*next_ret = prev;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* check to see if two extent_map structs are adjacent and safe to merge */
|
|
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
|
|
{
|
|
if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
|
|
return 0;
|
|
|
|
/*
|
|
* don't merge compressed extents, we need to know their
|
|
* actual size
|
|
*/
|
|
if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
|
|
return 0;
|
|
|
|
if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
|
|
test_bit(EXTENT_FLAG_LOGGING, &next->flags))
|
|
return 0;
|
|
|
|
if (extent_map_end(prev) == next->start &&
|
|
prev->flags == next->flags &&
|
|
prev->bdev == next->bdev &&
|
|
((next->block_start == EXTENT_MAP_HOLE &&
|
|
prev->block_start == EXTENT_MAP_HOLE) ||
|
|
(next->block_start == EXTENT_MAP_INLINE &&
|
|
prev->block_start == EXTENT_MAP_INLINE) ||
|
|
(next->block_start == EXTENT_MAP_DELALLOC &&
|
|
prev->block_start == EXTENT_MAP_DELALLOC) ||
|
|
(next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
|
|
next->block_start == extent_map_block_end(prev)))) {
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
|
|
{
|
|
struct extent_map *merge = NULL;
|
|
struct rb_node *rb;
|
|
|
|
if (em->start != 0) {
|
|
rb = rb_prev(&em->rb_node);
|
|
if (rb)
|
|
merge = rb_entry(rb, struct extent_map, rb_node);
|
|
if (rb && mergable_maps(merge, em)) {
|
|
em->start = merge->start;
|
|
em->orig_start = merge->orig_start;
|
|
em->len += merge->len;
|
|
em->block_len += merge->block_len;
|
|
em->block_start = merge->block_start;
|
|
merge->in_tree = 0;
|
|
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
|
|
em->mod_start = merge->mod_start;
|
|
em->generation = max(em->generation, merge->generation);
|
|
list_move(&em->list, &tree->modified_extents);
|
|
|
|
list_del_init(&merge->list);
|
|
rb_erase(&merge->rb_node, &tree->map);
|
|
free_extent_map(merge);
|
|
}
|
|
}
|
|
|
|
rb = rb_next(&em->rb_node);
|
|
if (rb)
|
|
merge = rb_entry(rb, struct extent_map, rb_node);
|
|
if (rb && mergable_maps(em, merge)) {
|
|
em->len += merge->len;
|
|
em->block_len += merge->len;
|
|
rb_erase(&merge->rb_node, &tree->map);
|
|
merge->in_tree = 0;
|
|
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
|
|
em->generation = max(em->generation, merge->generation);
|
|
list_del_init(&merge->list);
|
|
free_extent_map(merge);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* unpin_extent_cache - unpin an extent from the cache
|
|
* @tree: tree to unpin the extent in
|
|
* @start: logical offset in the file
|
|
* @len: length of the extent
|
|
* @gen: generation that this extent has been modified in
|
|
*
|
|
* Called after an extent has been written to disk properly. Set the generation
|
|
* to the generation that actually added the file item to the inode so we know
|
|
* we need to sync this extent when we call fsync().
|
|
*/
|
|
int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
|
|
u64 gen)
|
|
{
|
|
int ret = 0;
|
|
struct extent_map *em;
|
|
bool prealloc = false;
|
|
|
|
write_lock(&tree->lock);
|
|
em = lookup_extent_mapping(tree, start, len);
|
|
|
|
WARN_ON(!em || em->start != start);
|
|
|
|
if (!em)
|
|
goto out;
|
|
|
|
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
|
|
list_move(&em->list, &tree->modified_extents);
|
|
em->generation = gen;
|
|
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
|
|
em->mod_start = em->start;
|
|
em->mod_len = em->len;
|
|
|
|
if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
|
|
prealloc = true;
|
|
clear_bit(EXTENT_FLAG_FILLING, &em->flags);
|
|
}
|
|
|
|
try_merge_map(tree, em);
|
|
|
|
if (prealloc) {
|
|
em->mod_start = em->start;
|
|
em->mod_len = em->len;
|
|
}
|
|
|
|
free_extent_map(em);
|
|
out:
|
|
write_unlock(&tree->lock);
|
|
return ret;
|
|
|
|
}
|
|
|
|
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
|
|
{
|
|
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
|
|
try_merge_map(tree, em);
|
|
}
|
|
|
|
/**
|
|
* add_extent_mapping - add new extent map to the extent tree
|
|
* @tree: tree to insert new map in
|
|
* @em: map to insert
|
|
*
|
|
* Insert @em into @tree or perform a simple forward/backward merge with
|
|
* existing mappings. The extent_map struct passed in will be inserted
|
|
* into the tree directly, with an additional reference taken, or a
|
|
* reference dropped if the merge attempt was successful.
|
|
*/
|
|
int add_extent_mapping(struct extent_map_tree *tree,
|
|
struct extent_map *em)
|
|
{
|
|
int ret = 0;
|
|
struct rb_node *rb;
|
|
struct extent_map *exist;
|
|
|
|
exist = lookup_extent_mapping(tree, em->start, em->len);
|
|
if (exist) {
|
|
free_extent_map(exist);
|
|
ret = -EEXIST;
|
|
goto out;
|
|
}
|
|
rb = tree_insert(&tree->map, em->start, &em->rb_node);
|
|
if (rb) {
|
|
ret = -EEXIST;
|
|
goto out;
|
|
}
|
|
atomic_inc(&em->refs);
|
|
|
|
em->mod_start = em->start;
|
|
em->mod_len = em->len;
|
|
|
|
try_merge_map(tree, em);
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
/* simple helper to do math around the end of an extent, handling wrap */
|
|
static u64 range_end(u64 start, u64 len)
|
|
{
|
|
if (start + len < start)
|
|
return (u64)-1;
|
|
return start + len;
|
|
}
|
|
|
|
struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
|
|
u64 start, u64 len, int strict)
|
|
{
|
|
struct extent_map *em;
|
|
struct rb_node *rb_node;
|
|
struct rb_node *prev = NULL;
|
|
struct rb_node *next = NULL;
|
|
u64 end = range_end(start, len);
|
|
|
|
rb_node = __tree_search(&tree->map, start, &prev, &next);
|
|
if (!rb_node) {
|
|
if (prev)
|
|
rb_node = prev;
|
|
else if (next)
|
|
rb_node = next;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
em = rb_entry(rb_node, struct extent_map, rb_node);
|
|
|
|
if (strict && !(end > em->start && start < extent_map_end(em)))
|
|
return NULL;
|
|
|
|
atomic_inc(&em->refs);
|
|
return em;
|
|
}
|
|
|
|
/**
|
|
* lookup_extent_mapping - lookup extent_map
|
|
* @tree: tree to lookup in
|
|
* @start: byte offset to start the search
|
|
* @len: length of the lookup range
|
|
*
|
|
* Find and return the first extent_map struct in @tree that intersects the
|
|
* [start, len] range. There may be additional objects in the tree that
|
|
* intersect, so check the object returned carefully to make sure that no
|
|
* additional lookups are needed.
|
|
*/
|
|
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
|
|
u64 start, u64 len)
|
|
{
|
|
return __lookup_extent_mapping(tree, start, len, 1);
|
|
}
|
|
|
|
/**
|
|
* search_extent_mapping - find a nearby extent map
|
|
* @tree: tree to lookup in
|
|
* @start: byte offset to start the search
|
|
* @len: length of the lookup range
|
|
*
|
|
* Find and return the first extent_map struct in @tree that intersects the
|
|
* [start, len] range.
|
|
*
|
|
* If one can't be found, any nearby extent may be returned
|
|
*/
|
|
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
|
|
u64 start, u64 len)
|
|
{
|
|
return __lookup_extent_mapping(tree, start, len, 0);
|
|
}
|
|
|
|
/**
|
|
* remove_extent_mapping - removes an extent_map from the extent tree
|
|
* @tree: extent tree to remove from
|
|
* @em: extent map beeing removed
|
|
*
|
|
* Removes @em from @tree. No reference counts are dropped, and no checks
|
|
* are done to see if the range is in use
|
|
*/
|
|
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
|
|
{
|
|
int ret = 0;
|
|
|
|
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
|
|
rb_erase(&em->rb_node, &tree->map);
|
|
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
|
|
list_del_init(&em->list);
|
|
em->in_tree = 0;
|
|
return ret;
|
|
}
|