mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 12:42:02 +00:00
ext4: Fix bigalloc quota accounting and i_blocks value
With bigalloc changes, the i_blocks value was not correctly set (it was still set to number of blocks being used, but in case of bigalloc, we want i_blocks to represent the number of clusters being used). Since the quota subsystem sets the i_blocks value, this patch fixes the quota accounting and makes sure that the i_blocks value is set correctly. Signed-off-by: Aditya Kali <adityakali@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
parent
27baebb849
commit
7b415bf60f
@ -485,7 +485,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
|
|||||||
* @handle: handle to this transaction
|
* @handle: handle to this transaction
|
||||||
* @inode: file inode
|
* @inode: file inode
|
||||||
* @goal: given target block(filesystem wide)
|
* @goal: given target block(filesystem wide)
|
||||||
* @count: pointer to total number of blocks needed
|
* @count: pointer to total number of clusters needed
|
||||||
* @errp: error code
|
* @errp: error code
|
||||||
*
|
*
|
||||||
* Return 1st allocated block number on success, *count stores total account
|
* Return 1st allocated block number on success, *count stores total account
|
||||||
@ -517,7 +517,8 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
|
|||||||
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||||
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
|
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
|
||||||
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||||
dquot_alloc_block_nofail(inode, ar.len);
|
dquot_alloc_block_nofail(inode,
|
||||||
|
EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -144,9 +144,17 @@ struct ext4_allocation_request {
|
|||||||
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
|
#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
|
||||||
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
|
#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
|
||||||
#define EXT4_MAP_UNINIT (1 << BH_Uninit)
|
#define EXT4_MAP_UNINIT (1 << BH_Uninit)
|
||||||
|
/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
|
||||||
|
* ext4_map_blocks wants to know whether or not the underlying cluster has
|
||||||
|
* already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
|
||||||
|
* the requested mapping was from previously mapped (or delayed allocated)
|
||||||
|
* cluster. We use BH_AllocFromCluster only for this flag. BH_AllocFromCluster
|
||||||
|
* should never appear on buffer_head's state flags.
|
||||||
|
*/
|
||||||
|
#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
|
||||||
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
|
#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
|
||||||
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
|
EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
|
||||||
EXT4_MAP_UNINIT)
|
EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
|
||||||
|
|
||||||
struct ext4_map_blocks {
|
struct ext4_map_blocks {
|
||||||
ext4_fsblk_t m_pblk;
|
ext4_fsblk_t m_pblk;
|
||||||
@ -1884,6 +1892,7 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
|
|||||||
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
||||||
extern void ext4_da_update_reserve_space(struct inode *inode,
|
extern void ext4_da_update_reserve_space(struct inode *inode,
|
||||||
int used, int quota_claim);
|
int used, int quota_claim);
|
||||||
|
extern int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock);
|
||||||
|
|
||||||
/* indirect.c */
|
/* indirect.c */
|
||||||
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
|
||||||
@ -2284,6 +2293,11 @@ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
|
|||||||
enum ext4_state_bits {
|
enum ext4_state_bits {
|
||||||
BH_Uninit /* blocks are allocated but uninitialized on disk */
|
BH_Uninit /* blocks are allocated but uninitialized on disk */
|
||||||
= BH_JBDPrivateStart,
|
= BH_JBDPrivateStart,
|
||||||
|
BH_AllocFromCluster, /* allocated blocks were part of already
|
||||||
|
* allocated cluster. Note that this flag will
|
||||||
|
* never, ever appear in a buffer_head's state
|
||||||
|
* flag. See EXT4_MAP_FROM_CLUSTER to see where
|
||||||
|
* this is used. */
|
||||||
};
|
};
|
||||||
|
|
||||||
BUFFER_FNS(Uninit, uninit)
|
BUFFER_FNS(Uninit, uninit)
|
||||||
|
@ -290,5 +290,7 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
|
|||||||
struct ext4_ext_path *);
|
struct ext4_ext_path *);
|
||||||
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
|
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
|
||||||
extern int ext4_ext_check_inode(struct inode *inode);
|
extern int ext4_ext_check_inode(struct inode *inode);
|
||||||
|
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
int search_hint_reverse);
|
||||||
#endif /* _EXT4_EXTENTS */
|
#endif /* _EXT4_EXTENTS */
|
||||||
|
|
||||||
|
@ -2686,6 +2686,21 @@ again:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If we still have something in the partial cluster and we have removed
|
||||||
|
* even the first extent, then we should free the blocks in the partial
|
||||||
|
* cluster as well. */
|
||||||
|
if (partial_cluster && path->p_hdr->eh_entries == 0) {
|
||||||
|
int flags = EXT4_FREE_BLOCKS_FORGET;
|
||||||
|
|
||||||
|
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
|
||||||
|
flags |= EXT4_FREE_BLOCKS_METADATA;
|
||||||
|
|
||||||
|
ext4_free_blocks(handle, inode, NULL,
|
||||||
|
EXT4_C2B(EXT4_SB(sb), partial_cluster),
|
||||||
|
EXT4_SB(sb)->s_cluster_ratio, flags);
|
||||||
|
partial_cluster = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO: flexible tree reduction should be here */
|
/* TODO: flexible tree reduction should be here */
|
||||||
if (path->p_hdr->eh_entries == 0) {
|
if (path->p_hdr->eh_entries == 0) {
|
||||||
/*
|
/*
|
||||||
@ -3233,6 +3248,195 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
|
|||||||
return ext4_mark_inode_dirty(handle, inode);
|
return ext4_mark_inode_dirty(handle, inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ext4_find_delalloc_range: find delayed allocated block in the given range.
|
||||||
|
*
|
||||||
|
* Goes through the buffer heads in the range [lblk_start, lblk_end] and returns
|
||||||
|
* whether there are any buffers marked for delayed allocation. It returns '1'
|
||||||
|
* on the first delalloc'ed buffer head found. If no buffer head in the given
|
||||||
|
* range is marked for delalloc, it returns 0.
|
||||||
|
* lblk_start should always be <= lblk_end.
|
||||||
|
* search_hint_reverse is to indicate that searching in reverse from lblk_end to
|
||||||
|
* lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
|
||||||
|
* block sooner). This is useful when blocks are truncated sequentially from
|
||||||
|
* lblk_start towards lblk_end.
|
||||||
|
*/
|
||||||
|
static int ext4_find_delalloc_range(struct inode *inode,
|
||||||
|
ext4_lblk_t lblk_start,
|
||||||
|
ext4_lblk_t lblk_end,
|
||||||
|
int search_hint_reverse)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = inode->i_mapping;
|
||||||
|
struct buffer_head *head, *bh = NULL;
|
||||||
|
struct page *page;
|
||||||
|
ext4_lblk_t i, pg_lblk;
|
||||||
|
pgoff_t index;
|
||||||
|
|
||||||
|
/* reverse search wont work if fs block size is less than page size */
|
||||||
|
if (inode->i_blkbits < PAGE_CACHE_SHIFT)
|
||||||
|
search_hint_reverse = 0;
|
||||||
|
|
||||||
|
if (search_hint_reverse)
|
||||||
|
i = lblk_end;
|
||||||
|
else
|
||||||
|
i = lblk_start;
|
||||||
|
|
||||||
|
index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
|
||||||
|
while ((i >= lblk_start) && (i <= lblk_end)) {
|
||||||
|
page = find_get_page(mapping, index);
|
||||||
|
if (!page || !PageDirty(page))
|
||||||
|
goto nextpage;
|
||||||
|
|
||||||
|
if (PageWriteback(page)) {
|
||||||
|
/*
|
||||||
|
* This might be a race with allocation and writeout. In
|
||||||
|
* this case we just assume that the rest of the range
|
||||||
|
* will eventually be written and there wont be any
|
||||||
|
* delalloc blocks left.
|
||||||
|
* TODO: the above assumption is troublesome, but might
|
||||||
|
* work better in practice. other option could be note
|
||||||
|
* somewhere that the cluster is getting written out and
|
||||||
|
* detect that here.
|
||||||
|
*/
|
||||||
|
page_cache_release(page);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!page_has_buffers(page))
|
||||||
|
goto nextpage;
|
||||||
|
|
||||||
|
head = page_buffers(page);
|
||||||
|
if (!head)
|
||||||
|
goto nextpage;
|
||||||
|
|
||||||
|
bh = head;
|
||||||
|
pg_lblk = index << (PAGE_CACHE_SHIFT -
|
||||||
|
inode->i_blkbits);
|
||||||
|
do {
|
||||||
|
if (unlikely(pg_lblk < lblk_start)) {
|
||||||
|
/*
|
||||||
|
* This is possible when fs block size is less
|
||||||
|
* than page size and our cluster starts/ends in
|
||||||
|
* middle of the page. So we need to skip the
|
||||||
|
* initial few blocks till we reach the 'lblk'
|
||||||
|
*/
|
||||||
|
pg_lblk++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (buffer_delay(bh)) {
|
||||||
|
page_cache_release(page);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (search_hint_reverse)
|
||||||
|
i--;
|
||||||
|
else
|
||||||
|
i++;
|
||||||
|
} while ((i >= lblk_start) && (i <= lblk_end) &&
|
||||||
|
((bh = bh->b_this_page) != head));
|
||||||
|
nextpage:
|
||||||
|
if (page)
|
||||||
|
page_cache_release(page);
|
||||||
|
/*
|
||||||
|
* Move to next page. 'i' will be the first lblk in the next
|
||||||
|
* page.
|
||||||
|
*/
|
||||||
|
if (search_hint_reverse)
|
||||||
|
index--;
|
||||||
|
else
|
||||||
|
index++;
|
||||||
|
i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk,
|
||||||
|
int search_hint_reverse)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
ext4_lblk_t lblk_start, lblk_end;
|
||||||
|
lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
|
||||||
|
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
|
||||||
|
|
||||||
|
return ext4_find_delalloc_range(inode, lblk_start, lblk_end,
|
||||||
|
search_hint_reverse);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines how many complete clusters (out of those specified by the 'map')
|
||||||
|
* are under delalloc and were reserved quota for.
|
||||||
|
* This function is called when we are writing out the blocks that were
|
||||||
|
* originally written with their allocation delayed, but then the space was
|
||||||
|
* allocated using fallocate() before the delayed allocation could be resolved.
|
||||||
|
* The cases to look for are:
|
||||||
|
* ('=' indicated delayed allocated blocks
|
||||||
|
* '-' indicates non-delayed allocated blocks)
|
||||||
|
* (a) partial clusters towards beginning and/or end outside of allocated range
|
||||||
|
* are not delalloc'ed.
|
||||||
|
* Ex:
|
||||||
|
* |----c---=|====c====|====c====|===-c----|
|
||||||
|
* |++++++ allocated ++++++|
|
||||||
|
* ==> 4 complete clusters in above example
|
||||||
|
*
|
||||||
|
* (b) partial cluster (outside of allocated range) towards either end is
|
||||||
|
* marked for delayed allocation. In this case, we will exclude that
|
||||||
|
* cluster.
|
||||||
|
* Ex:
|
||||||
|
* |----====c========|========c========|
|
||||||
|
* |++++++ allocated ++++++|
|
||||||
|
* ==> 1 complete clusters in above example
|
||||||
|
*
|
||||||
|
* Ex:
|
||||||
|
* |================c================|
|
||||||
|
* |++++++ allocated ++++++|
|
||||||
|
* ==> 0 complete clusters in above example
|
||||||
|
*
|
||||||
|
* The ext4_da_update_reserve_space will be called only if we
|
||||||
|
* determine here that there were some "entire" clusters that span
|
||||||
|
* this 'allocated' range.
|
||||||
|
* In the non-bigalloc case, this function will just end up returning num_blks
|
||||||
|
* without ever calling ext4_find_delalloc_range.
|
||||||
|
*/
|
||||||
|
static unsigned int
|
||||||
|
get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
|
||||||
|
unsigned int num_blks)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
ext4_lblk_t alloc_cluster_start, alloc_cluster_end;
|
||||||
|
ext4_lblk_t lblk_from, lblk_to, c_offset;
|
||||||
|
unsigned int allocated_clusters = 0;
|
||||||
|
|
||||||
|
alloc_cluster_start = EXT4_B2C(sbi, lblk_start);
|
||||||
|
alloc_cluster_end = EXT4_B2C(sbi, lblk_start + num_blks - 1);
|
||||||
|
|
||||||
|
/* max possible clusters for this allocation */
|
||||||
|
allocated_clusters = alloc_cluster_end - alloc_cluster_start + 1;
|
||||||
|
|
||||||
|
/* Check towards left side */
|
||||||
|
c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
|
||||||
|
if (c_offset) {
|
||||||
|
lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
|
||||||
|
lblk_to = lblk_from + c_offset - 1;
|
||||||
|
|
||||||
|
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
|
||||||
|
allocated_clusters--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now check towards right. */
|
||||||
|
c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
|
||||||
|
if (allocated_clusters && c_offset) {
|
||||||
|
lblk_from = lblk_start + num_blks;
|
||||||
|
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
|
||||||
|
|
||||||
|
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0))
|
||||||
|
allocated_clusters--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return allocated_clusters;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
||||||
struct ext4_map_blocks *map,
|
struct ext4_map_blocks *map,
|
||||||
@ -3338,8 +3542,15 @@ out:
|
|||||||
* But fallocate would have already updated quota and block
|
* But fallocate would have already updated quota and block
|
||||||
* count for this offset. So cancel these reservation
|
* count for this offset. So cancel these reservation
|
||||||
*/
|
*/
|
||||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
||||||
ext4_da_update_reserve_space(inode, allocated, 0);
|
unsigned int reserved_clusters;
|
||||||
|
reserved_clusters = get_reserved_cluster_alloc(inode,
|
||||||
|
map->m_lblk, map->m_len);
|
||||||
|
if (reserved_clusters)
|
||||||
|
ext4_da_update_reserve_space(inode,
|
||||||
|
reserved_clusters,
|
||||||
|
0);
|
||||||
|
}
|
||||||
|
|
||||||
map_out:
|
map_out:
|
||||||
map->m_flags |= EXT4_MAP_MAPPED;
|
map->m_flags |= EXT4_MAP_MAPPED;
|
||||||
@ -3484,6 +3695,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
ext4_fsblk_t newblock = 0;
|
ext4_fsblk_t newblock = 0;
|
||||||
int free_on_err = 0, err = 0, depth, ret;
|
int free_on_err = 0, err = 0, depth, ret;
|
||||||
unsigned int allocated = 0, offset = 0;
|
unsigned int allocated = 0, offset = 0;
|
||||||
|
unsigned int allocated_clusters = 0, reserved_clusters = 0;
|
||||||
unsigned int punched_out = 0;
|
unsigned int punched_out = 0;
|
||||||
unsigned int result = 0;
|
unsigned int result = 0;
|
||||||
struct ext4_allocation_request ar;
|
struct ext4_allocation_request ar;
|
||||||
@ -3499,6 +3711,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
|
if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
|
||||||
ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
|
ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
|
||||||
if (!newex.ee_start_lo && !newex.ee_start_hi) {
|
if (!newex.ee_start_lo && !newex.ee_start_hi) {
|
||||||
|
if ((sbi->s_cluster_ratio > 1) &&
|
||||||
|
ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
|
||||||
|
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||||
|
|
||||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
||||||
/*
|
/*
|
||||||
* block isn't allocated yet and
|
* block isn't allocated yet and
|
||||||
@ -3509,6 +3725,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
/* we should allocate requested block */
|
/* we should allocate requested block */
|
||||||
} else {
|
} else {
|
||||||
/* block is already allocated */
|
/* block is already allocated */
|
||||||
|
if (sbi->s_cluster_ratio > 1)
|
||||||
|
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||||
newblock = map->m_lblk
|
newblock = map->m_lblk
|
||||||
- le32_to_cpu(newex.ee_block)
|
- le32_to_cpu(newex.ee_block)
|
||||||
+ ext4_ext_pblock(&newex);
|
+ ext4_ext_pblock(&newex);
|
||||||
@ -3665,6 +3883,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((sbi->s_cluster_ratio > 1) &&
|
||||||
|
ext4_find_delalloc_cluster(inode, map->m_lblk, 0))
|
||||||
|
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* requested block isn't allocated yet;
|
* requested block isn't allocated yet;
|
||||||
* we couldn't try to create block if create flag is zero
|
* we couldn't try to create block if create flag is zero
|
||||||
@ -3681,6 +3903,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
/*
|
/*
|
||||||
* Okay, we need to do block allocation.
|
* Okay, we need to do block allocation.
|
||||||
*/
|
*/
|
||||||
|
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
|
||||||
newex.ee_block = cpu_to_le32(map->m_lblk);
|
newex.ee_block = cpu_to_le32(map->m_lblk);
|
||||||
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
|
cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
|
||||||
|
|
||||||
@ -3692,6 +3915,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
get_implied_cluster_alloc(sbi, map, ex, path)) {
|
get_implied_cluster_alloc(sbi, map, ex, path)) {
|
||||||
ar.len = allocated = map->m_len;
|
ar.len = allocated = map->m_len;
|
||||||
newblock = map->m_pblk;
|
newblock = map->m_pblk;
|
||||||
|
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||||
goto got_allocated_blocks;
|
goto got_allocated_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3712,6 +3936,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
get_implied_cluster_alloc(sbi, map, ex2, path)) {
|
get_implied_cluster_alloc(sbi, map, ex2, path)) {
|
||||||
ar.len = allocated = map->m_len;
|
ar.len = allocated = map->m_len;
|
||||||
newblock = map->m_pblk;
|
newblock = map->m_pblk;
|
||||||
|
map->m_flags |= EXT4_MAP_FROM_CLUSTER;
|
||||||
goto got_allocated_blocks;
|
goto got_allocated_blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3765,6 +3990,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
|
|||||||
ext_debug("allocate new block: goal %llu, found %llu/%u\n",
|
ext_debug("allocate new block: goal %llu, found %llu/%u\n",
|
||||||
ar.goal, newblock, allocated);
|
ar.goal, newblock, allocated);
|
||||||
free_on_err = 1;
|
free_on_err = 1;
|
||||||
|
allocated_clusters = ar.len;
|
||||||
ar.len = EXT4_C2B(sbi, ar.len) - offset;
|
ar.len = EXT4_C2B(sbi, ar.len) - offset;
|
||||||
if (ar.len > allocated)
|
if (ar.len > allocated)
|
||||||
ar.len = allocated;
|
ar.len = allocated;
|
||||||
@ -3822,8 +4048,80 @@ got_allocated_blocks:
|
|||||||
* Update reserved blocks/metadata blocks after successful
|
* Update reserved blocks/metadata blocks after successful
|
||||||
* block allocation which had been deferred till now.
|
* block allocation which had been deferred till now.
|
||||||
*/
|
*/
|
||||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
|
||||||
ext4_da_update_reserve_space(inode, allocated, 1);
|
/*
|
||||||
|
* Check how many clusters we had reserved this allocted range.
|
||||||
|
*/
|
||||||
|
reserved_clusters = get_reserved_cluster_alloc(inode,
|
||||||
|
map->m_lblk, allocated);
|
||||||
|
if (map->m_flags & EXT4_MAP_FROM_CLUSTER) {
|
||||||
|
if (reserved_clusters) {
|
||||||
|
/*
|
||||||
|
* We have clusters reserved for this range.
|
||||||
|
* But since we are not doing actual allocation
|
||||||
|
* and are simply using blocks from previously
|
||||||
|
* allocated cluster, we should release the
|
||||||
|
* reservation and not claim quota.
|
||||||
|
*/
|
||||||
|
ext4_da_update_reserve_space(inode,
|
||||||
|
reserved_clusters, 0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BUG_ON(allocated_clusters < reserved_clusters);
|
||||||
|
/* We will claim quota for all newly allocated blocks.*/
|
||||||
|
ext4_da_update_reserve_space(inode, allocated_clusters,
|
||||||
|
1);
|
||||||
|
if (reserved_clusters < allocated_clusters) {
|
||||||
|
int reservation = allocated_clusters -
|
||||||
|
reserved_clusters;
|
||||||
|
/*
|
||||||
|
* It seems we claimed few clusters outside of
|
||||||
|
* the range of this allocation. We should give
|
||||||
|
* it back to the reservation pool. This can
|
||||||
|
* happen in the following case:
|
||||||
|
*
|
||||||
|
* * Suppose s_cluster_ratio is 4 (i.e., each
|
||||||
|
* cluster has 4 blocks. Thus, the clusters
|
||||||
|
* are [0-3],[4-7],[8-11]...
|
||||||
|
* * First comes delayed allocation write for
|
||||||
|
* logical blocks 10 & 11. Since there were no
|
||||||
|
* previous delayed allocated blocks in the
|
||||||
|
* range [8-11], we would reserve 1 cluster
|
||||||
|
* for this write.
|
||||||
|
* * Next comes write for logical blocks 3 to 8.
|
||||||
|
* In this case, we will reserve 2 clusters
|
||||||
|
* (for [0-3] and [4-7]; and not for [8-11] as
|
||||||
|
* that range has a delayed allocated blocks.
|
||||||
|
* Thus total reserved clusters now becomes 3.
|
||||||
|
* * Now, during the delayed allocation writeout
|
||||||
|
* time, we will first write blocks [3-8] and
|
||||||
|
* allocate 3 clusters for writing these
|
||||||
|
* blocks. Also, we would claim all these
|
||||||
|
* three clusters above.
|
||||||
|
* * Now when we come here to writeout the
|
||||||
|
* blocks [10-11], we would expect to claim
|
||||||
|
* the reservation of 1 cluster we had made
|
||||||
|
* (and we would claim it since there are no
|
||||||
|
* more delayed allocated blocks in the range
|
||||||
|
* [8-11]. But our reserved cluster count had
|
||||||
|
* already gone to 0.
|
||||||
|
*
|
||||||
|
* Thus, at the step 4 above when we determine
|
||||||
|
* that there are still some unwritten delayed
|
||||||
|
* allocated blocks outside of our current
|
||||||
|
* block range, we should increment the
|
||||||
|
* reserved clusters count so that when the
|
||||||
|
* remaining blocks finally gets written, we
|
||||||
|
* could claim them.
|
||||||
|
*/
|
||||||
|
while (reservation) {
|
||||||
|
ext4_da_reserve_space(inode,
|
||||||
|
map->m_lblk);
|
||||||
|
reservation--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Cache the extent and update transaction to commit on fdatasync only
|
* Cache the extent and update transaction to commit on fdatasync only
|
||||||
|
@ -300,14 +300,14 @@ void ext4_da_update_reserve_space(struct inode *inode,
|
|||||||
|
|
||||||
/* Update quota subsystem for data blocks */
|
/* Update quota subsystem for data blocks */
|
||||||
if (quota_claim)
|
if (quota_claim)
|
||||||
dquot_claim_block(inode, used);
|
dquot_claim_block(inode, EXT4_C2B(sbi, used));
|
||||||
else {
|
else {
|
||||||
/*
|
/*
|
||||||
* We did fallocate with an offset that is already delayed
|
* We did fallocate with an offset that is already delayed
|
||||||
* allocated. So on delayed allocated writeback we should
|
* allocated. So on delayed allocated writeback we should
|
||||||
* not re-claim the quota for fallocated blocks.
|
* not re-claim the quota for fallocated blocks.
|
||||||
*/
|
*/
|
||||||
dquot_release_reservation_block(inode, used);
|
dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1037,14 +1037,14 @@ static int ext4_journalled_write_end(struct file *file,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reserve a single block located at lblock
|
* Reserve a single cluster located at lblock
|
||||||
*/
|
*/
|
||||||
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
|
int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
|
||||||
{
|
{
|
||||||
int retries = 0;
|
int retries = 0;
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||||
unsigned long md_needed;
|
unsigned int md_needed;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1054,7 +1054,8 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
|
|||||||
*/
|
*/
|
||||||
repeat:
|
repeat:
|
||||||
spin_lock(&ei->i_block_reservation_lock);
|
spin_lock(&ei->i_block_reservation_lock);
|
||||||
md_needed = ext4_calc_metadata_amount(inode, lblock);
|
md_needed = EXT4_NUM_B2C(sbi,
|
||||||
|
ext4_calc_metadata_amount(inode, lblock));
|
||||||
trace_ext4_da_reserve_space(inode, md_needed);
|
trace_ext4_da_reserve_space(inode, md_needed);
|
||||||
spin_unlock(&ei->i_block_reservation_lock);
|
spin_unlock(&ei->i_block_reservation_lock);
|
||||||
|
|
||||||
@ -1063,7 +1064,7 @@ repeat:
|
|||||||
* us from metadata over-estimation, though we may go over by
|
* us from metadata over-estimation, though we may go over by
|
||||||
* a small amount in the end. Here we just reserve for data.
|
* a small amount in the end. Here we just reserve for data.
|
||||||
*/
|
*/
|
||||||
ret = dquot_reserve_block(inode, 1);
|
ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
/*
|
/*
|
||||||
@ -1071,7 +1072,7 @@ repeat:
|
|||||||
* we cannot afford to run out of free blocks.
|
* we cannot afford to run out of free blocks.
|
||||||
*/
|
*/
|
||||||
if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
|
if (ext4_claim_free_blocks(sbi, md_needed + 1, 0)) {
|
||||||
dquot_release_reservation_block(inode, 1);
|
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
|
||||||
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
|
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
|
||||||
yield();
|
yield();
|
||||||
goto repeat;
|
goto repeat;
|
||||||
@ -1118,6 +1119,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
|
|||||||
* We can release all of the reserved metadata blocks
|
* We can release all of the reserved metadata blocks
|
||||||
* only when we have written all of the delayed
|
* only when we have written all of the delayed
|
||||||
* allocation blocks.
|
* allocation blocks.
|
||||||
|
* Note that in case of bigalloc, i_reserved_meta_blocks,
|
||||||
|
* i_reserved_data_blocks, etc. refer to number of clusters.
|
||||||
*/
|
*/
|
||||||
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
|
percpu_counter_sub(&sbi->s_dirtyclusters_counter,
|
||||||
ei->i_reserved_meta_blocks);
|
ei->i_reserved_meta_blocks);
|
||||||
@ -1130,7 +1133,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
|
|||||||
|
|
||||||
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||||||
|
|
||||||
dquot_release_reservation_block(inode, to_free);
|
dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ext4_da_page_release_reservation(struct page *page,
|
static void ext4_da_page_release_reservation(struct page *page,
|
||||||
@ -1139,6 +1142,9 @@ static void ext4_da_page_release_reservation(struct page *page,
|
|||||||
int to_release = 0;
|
int to_release = 0;
|
||||||
struct buffer_head *head, *bh;
|
struct buffer_head *head, *bh;
|
||||||
unsigned int curr_off = 0;
|
unsigned int curr_off = 0;
|
||||||
|
struct inode *inode = page->mapping->host;
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
int num_clusters;
|
||||||
|
|
||||||
head = page_buffers(page);
|
head = page_buffers(page);
|
||||||
bh = head;
|
bh = head;
|
||||||
@ -1151,7 +1157,20 @@ static void ext4_da_page_release_reservation(struct page *page,
|
|||||||
}
|
}
|
||||||
curr_off = next_off;
|
curr_off = next_off;
|
||||||
} while ((bh = bh->b_this_page) != head);
|
} while ((bh = bh->b_this_page) != head);
|
||||||
ext4_da_release_space(page->mapping->host, to_release);
|
|
||||||
|
/* If we have released all the blocks belonging to a cluster, then we
|
||||||
|
* need to release the reserved space for that cluster. */
|
||||||
|
num_clusters = EXT4_NUM_B2C(sbi, to_release);
|
||||||
|
while (num_clusters > 0) {
|
||||||
|
ext4_fsblk_t lblk;
|
||||||
|
lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
|
||||||
|
((num_clusters - 1) << sbi->s_cluster_bits);
|
||||||
|
if (sbi->s_cluster_ratio == 1 ||
|
||||||
|
!ext4_find_delalloc_cluster(inode, lblk, 1))
|
||||||
|
ext4_da_release_space(inode, 1);
|
||||||
|
|
||||||
|
num_clusters--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1352,7 +1371,8 @@ static void ext4_print_free_blocks(struct inode *inode)
|
|||||||
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
|
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
|
||||||
percpu_counter_sum(&sbi->s_freeclusters_counter)));
|
percpu_counter_sum(&sbi->s_freeclusters_counter)));
|
||||||
printk(KERN_CRIT "dirty_blocks=%lld\n",
|
printk(KERN_CRIT "dirty_blocks=%lld\n",
|
||||||
(long long) percpu_counter_sum(&sbi->s_dirtyclusters_counter));
|
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
|
||||||
|
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
|
||||||
printk(KERN_CRIT "Block reservation details\n");
|
printk(KERN_CRIT "Block reservation details\n");
|
||||||
printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
|
printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
|
||||||
EXT4_I(inode)->i_reserved_data_blocks);
|
EXT4_I(inode)->i_reserved_data_blocks);
|
||||||
@ -1626,10 +1646,14 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
|||||||
/*
|
/*
|
||||||
* XXX: __block_write_begin() unmaps passed block, is it OK?
|
* XXX: __block_write_begin() unmaps passed block, is it OK?
|
||||||
*/
|
*/
|
||||||
ret = ext4_da_reserve_space(inode, iblock);
|
/* If the block was allocated from previously allocated cluster,
|
||||||
if (ret)
|
* then we dont need to reserve it again. */
|
||||||
/* not enough space to reserve */
|
if (!(map.m_flags & EXT4_MAP_FROM_CLUSTER)) {
|
||||||
return ret;
|
ret = ext4_da_reserve_space(inode, iblock);
|
||||||
|
if (ret)
|
||||||
|
/* not enough space to reserve */
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
map_bh(bh, inode->i_sb, invalid_block);
|
map_bh(bh, inode->i_sb, invalid_block);
|
||||||
set_buffer_new(bh);
|
set_buffer_new(bh);
|
||||||
|
@ -4718,6 +4718,9 @@ do_more:
|
|||||||
|
|
||||||
freed += count;
|
freed += count;
|
||||||
|
|
||||||
|
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
|
||||||
|
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
|
||||||
|
|
||||||
/* We dirtied the bitmap block */
|
/* We dirtied the bitmap block */
|
||||||
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
|
||||||
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
||||||
@ -4736,8 +4739,6 @@ do_more:
|
|||||||
}
|
}
|
||||||
ext4_mark_super_dirty(sb);
|
ext4_mark_super_dirty(sb);
|
||||||
error_return:
|
error_return:
|
||||||
if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
|
|
||||||
dquot_free_block(inode, freed);
|
|
||||||
brelse(bitmap_bh);
|
brelse(bitmap_bh);
|
||||||
ext4_std_error(sb, err);
|
ext4_std_error(sb, err);
|
||||||
return;
|
return;
|
||||||
|
@ -2473,7 +2473,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,
|
|||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||||
(s64) percpu_counter_sum(&sbi->s_dirtyclusters_counter));
|
(s64) EXT4_C2B(sbi,
|
||||||
|
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t session_write_kbytes_show(struct ext4_attr *a,
|
static ssize_t session_write_kbytes_show(struct ext4_attr *a,
|
||||||
|
Loading…
Reference in New Issue
Block a user