The big new feature added this time is supporting online resizing

using the meta_bg feature.  This allows us to resize file systems
 which are greater than 16TB.  In addition, the speed of online
 resizing has been improved in general.
 
 We also fix a number of races, some of which could lead to deadlocks,
 in ext4's Asynchronous I/O and online defrag support, thanks to good
 work by Dmitry Monakhov.
 
 There are also a large number of more minor bug fixes and cleanups
 from a number of other ext4 contributors, quite of few of which have
 submitted fixes for the first time.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABCAAGBQJQbxMXAAoJENNvdpvBGATwlg4QAJZ4mHNSL2eaaxjRtTbL1pAz
 +FVXpJ3lhw1lSfE9hJGqPVE8EfU2fWjIqxEI7dgh95Tukc5pUnPAQ2/hBz8ZA0qq
 o0AFMk3mRnvCEh6HsZfumsV83eqpR3k/zEy4uFH+KtxBskPe2sEKy3B7qOxvgdKW
 Gh8B2WqF2BpIj9WIT1P9G6xsxZW64EMHTbWcgRhuoRD7bakDNnwQ3kElz/TJQU5q
 bM/5wE7pqKwU2J1L0Ho0mxDi0f/BbXeJdA9k1tQy2KM1pZwHtpj4Ls0qmfoi49GE
 KyZqQOXlFbAz/9tidPDceY5KoRRQm1MwZ+1MimQX1P+40cs/w3pNu3yiibcaXIru
 UZ63AQMCj5JHMcFNVi20sVCwjU/ibNtEO75cfDD4bzPgHJvfCj73EbHTLl21nbTu
 izIMffhJEHmRnmRXiiortYVuI4b19oIfnXg7eclrJoUWSuGwKKsJOc5nMjDqidG4
 B7Gq4TD89sGkIYzx+50E+ll2ispcBN0BQnGqp4k2BzgDyEHhuFYk7VuVQvJgCGTi
 eobzQJj7JUXPWxyemcAVkQTtUq4vVbkm/IwS+/GA9b9Z80X8hR8x6EVHUW5lX3qC
 YHoBSCU4XKZXXWqzx0fIVCXyKKFiBzM+OXcgHOKH90vK8k6kPmPODhNCxvV3pITU
 jfl9q+X1dY4SpybZjLt5
 =iYeV
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "The big new feature added this time is supporting online resizing
  using the meta_bg feature.  This allows us to resize file systems
  which are greater than 16TB.  In addition, the speed of online
  resizing has been improved in general.

  We also fix a number of races, some of which could lead to deadlocks,
  in ext4's Asynchronous I/O and online defrag support, thanks to good
  work by Dmitry Monakhov.

  There are also a large number of more minor bug fixes and cleanups
  from a number of other ext4 contributors, quite of few of which have
  submitted fixes for the first time."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits)
  ext4: fix ext4_flush_completed_IO wait semantics
  ext4: fix mtime update in nodelalloc mode
  ext4: fix ext_remove_space for punch_hole case
  ext4: punch_hole should wait for DIO writers
  ext4: serialize truncate with owerwrite DIO workers
  ext4: endless truncate due to nonlocked dio readers
  ext4: serialize unlocked dio reads with truncate
  ext4: serialize dio nonlocked reads with defrag workers
  ext4: completed_io locking cleanup
  ext4: fix unwritten counter leakage
  ext4: give i_aiodio_unwritten a more appropriate name
  ext4: ext4_inode_info diet
  ext4: convert to use leXX_add_cpu()
  ext4: ext4_bread usage audit
  fs: reserve fallocate flag codepoint
  ext4: remove redundant offset check in mext_check_arguments()
  ext4: don't clear orphan list on ro mount with errors
  jbd2: fix assertion failure in commit code due to lacking transaction credits
  ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails
  ext4: enable FITRIM ioctl on bigalloc file system
  ...
This commit is contained in:
Linus Torvalds 2012-10-08 06:36:39 +09:00
commit 6432f21284
26 changed files with 1501 additions and 899 deletions

View File

@ -96,3 +96,16 @@ Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
The maximum number of megabytes the writeback code will
try to write out before move on to another inode.
What: /sys/fs/ext4/<disk>/extent_max_zeroout_kb
Date: August 2012
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
The maximum number of kilobytes which will be zeroed
out in preference to creating a new uninitialized
extent when manipulating an inode's extent tree. Note
that using a larger value will increase the
variability of time necessary to complete a random
write operation (since a 4k random write might turn
into a much larger write due to the zeroout
operation).

View File

@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified
Because of the restrictions this options comprises
it is off by default (e.g. dioread_lock).
max_dir_size_kb=n This limits the size of directories so that any
attempt to expand them beyond the specified
limit in kilobytes will cause an ENOSPC error.
This is useful in memory constrained
environments, where a very large directory can
cause severe performance problems or even
provoke the Out Of Memory killer. (For example,
if there is only 512mb memory available, a 176mb
directory may seriously cramp the system's style.)
i_version Enable 64-bit inode version support. This option is
off by default.

View File

@ -2312,12 +2312,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
loff_t size;
int ret;
/*
* Update file times before taking page lock. We may end up failing the
* fault so this update may be superfluous but who really cares...
*/
file_update_time(vma->vm_file);
lock_page(page);
size = i_size_read(inode);
if ((page->mapping != inode->i_mapping) ||
@ -2355,6 +2349,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
sb_start_pagefault(sb);
/*
* Update file times before taking page lock. We may end up failing the
* fault so this update may be superfluous but who really cares...
*/
file_update_time(vma->vm_file);
ret = __block_page_mkwrite(vma, vmf, get_block);
sb_end_pagefault(sb);
return block_page_mkwrite_return(ret);

View File

@ -186,7 +186,6 @@ struct mpage_da_data {
#define EXT4_IO_END_ERROR 0x0002
#define EXT4_IO_END_QUEUED 0x0004
#define EXT4_IO_END_DIRECT 0x0008
#define EXT4_IO_END_IN_FSYNC 0x0010
struct ext4_io_page {
struct page *p_page;
@ -912,9 +911,7 @@ struct ext4_inode_info {
struct list_head i_completed_io_list;
spinlock_t i_completed_io_lock;
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
@ -1233,6 +1230,7 @@ struct ext4_sb_info {
spinlock_t s_md_lock;
unsigned short *s_mb_offsets;
unsigned int *s_mb_maxs;
unsigned int s_group_info_size;
/* tunables */
unsigned long s_stripe;
@ -1243,6 +1241,7 @@ struct ext4_sb_info {
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_max_writeback_mb_bump;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
@ -1270,8 +1269,12 @@ struct ext4_sb_info {
unsigned long s_sectors_written_start;
u64 s_kbytes_written;
/* the size of zero-out chunk */
unsigned int s_extent_max_zeroout_kb;
unsigned int s_log_groups_per_flex;
struct flex_groups *s_flex_groups;
ext4_group_t s_flex_groups_allocated;
/* workqueue for dio unwritten */
struct workqueue_struct *dio_unwritten_wq;
@ -1328,10 +1331,20 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
{
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
atomic_inc(&EXT4_I(inode)->i_unwritten);
}
}
static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
{
return inode->i_private;
}
static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
{
inode->i_private = io;
}
/*
* Inode dynamic state flags
*/
@ -1345,6 +1358,8 @@ enum {
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
EXT4_STATE_NEWENTRY, /* File just added to dir */
EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
nolocking */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
/* fsync.c */
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
extern int ext4_flush_completed_IO(struct inode *);
extern int ext4_flush_unwritten_io(struct inode *);
/* hash.c */
extern int ext4fs_dirhash(const char *name, int len, struct
@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
ext4_group_t ngroups);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb,
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
extern void ext4_kvfree(void *ptr);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
extern __printf(4, 5)
void __ext4_error(struct super_block *, const char *, unsigned int,
const char *, ...);
@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
extern const struct inode_operations ext4_file_inode_operations;
extern const struct file_operations ext4_file_operations;
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
extern void ext4_unwritten_wait(struct inode *inode);
/* namei.c */
extern const struct inode_operations ext4_dir_inode_operations;
@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
/* page-io.c */
extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void);
extern void ext4_ioend_wait(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern int ext4_end_io_nolock(ext4_io_end_t *io);
extern void ext4_io_submit(struct ext4_io_submit *io);
extern int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
}
/*
* Disable DIO read nolock optimization, so new dioreaders will be forced
* to grab i_mutex
*/
static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
{
ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
smp_mb();
}
static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb();
ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
}
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
/* For ioend & aio unwritten conversion wait queues */

View File

@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
le16_add_cpu(&neh->eh_depth, 1);
ext4_mark_inode_dirty(handle, inode);
out:
brelse(bh);
@ -1655,17 +1655,61 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
return merge_done;
}
/*
* This function does a very simple check to see if we can collapse
* an extent tree with a single extent tree leaf block into the inode.
*/
static void ext4_ext_try_to_merge_up(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path)
{
size_t s;
unsigned max_root = ext4_ext_space_root(inode, 0);
ext4_fsblk_t blk;
if ((path[0].p_depth != 1) ||
(le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
(le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
return;
/*
* We need to modify the block allocation bitmap and the block
* group descriptor to release the extent tree block. If we
* can't get the journal credits, give up.
*/
if (ext4_journal_extend(handle, 2))
return;
/*
* Copy the extent data up to the inode
*/
blk = ext4_idx_pblock(path[0].p_idx);
s = le16_to_cpu(path[1].p_hdr->eh_entries) *
sizeof(struct ext4_extent_idx);
s += sizeof(struct ext4_extent_header);
memcpy(path[0].p_hdr, path[1].p_hdr, s);
path[0].p_depth = 0;
path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
(path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
path[0].p_hdr->eh_max = cpu_to_le16(max_root);
brelse(path[1].p_bh);
ext4_free_blocks(handle, inode, NULL, blk, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
}
/*
* This function tries to merge the @ex extent to neighbours in the tree.
* return 1 if merge left else 0.
*/
static int ext4_ext_try_to_merge(struct inode *inode,
static void ext4_ext_try_to_merge(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex) {
struct ext4_extent_header *eh;
unsigned int depth;
int merge_done = 0;
int ret = 0;
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
if (!merge_done)
ret = ext4_ext_try_to_merge_right(inode, path, ex);
(void) ext4_ext_try_to_merge_right(inode, path, ex);
return ret;
ext4_ext_try_to_merge_up(handle, inode, path);
}
/*
@ -1893,7 +1937,7 @@ has_space:
merge:
/* try to merge extents */
if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(inode, path, nearex);
ext4_ext_try_to_merge(handle, inode, path, nearex);
/* time to correct all indexes above */
@ -1901,7 +1945,7 @@ merge:
if (err)
goto cleanup;
err = ext4_ext_dirty(handle, inode, path + depth);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
cleanup:
if (npath) {
@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
}
/*
* ext4_ext_check_cache()
* ext4_ext_in_cache()
* Checks to see if the given block is in the cache.
* If it is, the cached extent is stored in the given
* cache extent pointer. If the cached extent is a hole,
* this routine should be used instead of
* ext4_ext_in_cache if the calling function needs to
* know the size of the hole.
* cache extent pointer.
*
* @inode: The files inode
* @block: The block to look for in the cache
@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
*
* Return 0 if cache is invalid; 1 if the cache is valid
*/
static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_ext_cache *ex){
static int
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_extent *ex)
{
struct ext4_ext_cache *cex;
struct ext4_sb_info *sbi;
int ret = 0;
@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
goto errout;
if (in_range(block, cex->ec_block, cex->ec_len)) {
memcpy(ex, cex, sizeof(struct ext4_ext_cache));
ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start);
ex->ee_len = cpu_to_le16(cex->ec_len);
ext_debug("%u cached by %u:%u:%llu\n",
block,
cex->ec_block, cex->ec_len, cex->ec_start);
@ -2137,37 +2182,6 @@ errout:
return ret;
}
/*
* ext4_ext_in_cache()
* Checks to see if the given block is in the cache.
* If it is, the cached extent is stored in the given
* extent pointer.
*
* @inode: The files inode
* @block: The block to look for in the cache
* @ex: Pointer where the cached extent will be stored
* if it contains block
*
* Return 0 if cache is invalid; 1 if the cache is valid
*/
static int
ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_extent *ex)
{
struct ext4_ext_cache cex;
int ret = 0;
if (ext4_ext_check_cache(inode, block, &cex)) {
ex->ee_block = cpu_to_le32(cex.ec_block);
ext4_ext_store_pblock(ex, cex.ec_start);
ex->ee_len = cpu_to_le16(cex.ec_len);
ret = 1;
}
return ret;
}
/*
* ext4_ext_rm_idx:
* removes index from the index block.
@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned short ee_len = ext4_ext_get_actual_len(ex);
ext4_fsblk_t pblk;
int flags = EXT4_FREE_BLOCKS_FORGET;
int flags = 0;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
flags |= EXT4_FREE_BLOCKS_METADATA;
flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
/*
* For bigalloc file systems, we never free a partial cluster
* at the beginning of the extent. Instead, we make a note
@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
struct ext4_ext_path *path = NULL;
ext4_fsblk_t partial_cluster = 0;
handle_t *handle;
int i = 0, err;
int i = 0, err = 0;
ext_debug("truncate since %u to %u\n", start, end);
@ -2604,12 +2621,16 @@ again:
return PTR_ERR(path);
}
depth = ext_depth(inode);
/* Leaf not may not exist only if inode has no blocks at all */
ex = path[depth].p_ext;
if (!ex) {
ext4_ext_drop_refs(path);
kfree(path);
path = NULL;
goto cont;
if (depth) {
EXT4_ERROR_INODE(inode,
"path[%d].p_hdr == NULL",
depth);
err = -EIO;
}
goto out;
}
ee_block = le32_to_cpu(ex->ee_block);
@ -2641,8 +2662,6 @@ again:
goto out;
}
}
cont:
/*
* We start scanning from right side, freeing all the blocks
* after i_size and walking into the tree depth-wise.
@ -2924,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_initialized(ex);
if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(inode, path, ex);
ext4_ext_try_to_merge(handle, inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + depth);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}
@ -2958,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
goto fix_extent_len;
/* update the extent length and mark as initialized */
ex->ee_len = cpu_to_le16(ee_len);
ext4_ext_try_to_merge(inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + depth);
ext4_ext_try_to_merge(handle, inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
} else if (err)
goto fix_extent_len;
@ -3041,7 +3060,6 @@ out:
return err ? err : map->m_len;
}
#define EXT4_EXT_ZERO_LEN 7
/*
* This function is called by ext4_ext_map_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@ -3067,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_map_blocks *map,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi;
struct ext4_extent_header *eh;
struct ext4_map_blocks split_map;
struct ext4_extent zero_ex;
struct ext4_extent *ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth;
int allocated;
int allocated, max_zeroout = 0;
int err = 0;
int split_flag = 0;
@ -3081,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
"block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)map->m_lblk, map->m_len);
sbi = EXT4_SB(inode->i_sb);
eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
@ -3180,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/
split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
(EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (EXT4_EXT_MAY_ZEROOUT & split_flag)
max_zeroout = sbi->s_extent_max_zeroout_kb >>
inode->i_sb->s_blocksize_bits;
/* If extent is less than s_max_zeroout_kb, zeroout directly */
if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex);
if (err)
goto out;
@ -3191,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (err)
goto out;
ext4_ext_mark_initialized(ex);
ext4_ext_try_to_merge(inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + depth);
ext4_ext_try_to_merge(handle, inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
goto out;
}
@ -3206,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
split_map.m_lblk = map->m_lblk;
split_map.m_len = map->m_len;
if (allocated > map->m_len) {
if (allocated <= EXT4_EXT_ZERO_LEN &&
(EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (max_zeroout && (allocated > map->m_len)) {
if (allocated <= max_zeroout) {
/* case 3 */
zero_ex.ee_block =
cpu_to_le32(map->m_lblk);
@ -3220,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
split_map.m_lblk = map->m_lblk;
split_map.m_len = allocated;
} else if ((map->m_lblk - ee_block + map->m_len <
EXT4_EXT_ZERO_LEN) &&
(EXT4_EXT_MAY_ZEROOUT & split_flag)) {
} else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
/* case 2 */
if (map->m_lblk != ee_block) {
zero_ex.ee_block = ex->ee_block;
@ -3242,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
}
allocated = ext4_split_extent(handle, inode, path,
&split_map, split_flag, 0);
&split_map, split_flag, 0);
if (allocated < 0)
err = allocated;
@ -3256,7 +3276,7 @@ out:
* to an uninitialized extent.
*
* Writing to an uninitialized extent may result in splitting the uninitialized
* extent into multiple /initialized uninitialized extents (up to three)
* extent into multiple initialized/uninitialized extents (up to three)
* There are three possibilities:
* a> There is no split required: Entire extent should be uninitialized
* b> Splits in two extents: Write is happening at either end of the extent
@ -3333,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
/* note: ext4_ext_correct_indexes() isn't needed here because
* borders are not changed
*/
ext4_ext_try_to_merge(inode, path, ex);
ext4_ext_try_to_merge(handle, inode, path, ex);
/* Mark modified extent as dirty */
err = ext4_ext_dirty(handle, inode, path + depth);
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
out:
ext4_ext_show_leaf(inode, path);
return err;
@ -3600,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
{
int ret = 0;
int err = 0;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
ext4_io_end_t *io = ext4_inode_aio(inode);
ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n",
@ -3615,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
ret = ext4_split_unwritten_extents(handle, inode, map,
path, flags);
if (ret <= 0)
goto out;
/*
* Flag the inode(non aio case) or end_io struct (aio case)
* that this IO needs to conversion to written when IO is
@ -3858,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset;
int set_unwritten = 0;
ext_debug("blocks %u/%u requested for inode %lu\n",
map->m_lblk, map->m_len, inode->i_ino);
@ -4082,13 +4105,8 @@ got_allocated_blocks:
* For non asycn direct IO case, flag the inode state
* that we need to perform conversion when IO is done.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
ext4_set_io_unwritten_flag(inode, io);
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
if ((flags & EXT4_GET_BLOCKS_PRE_IO))
set_unwritten = 1;
if (ext4_should_dioread_nolock(inode))
map->m_flags |= EXT4_MAP_UNINIT;
}
@ -4100,6 +4118,15 @@ got_allocated_blocks:
if (!err)
err = ext4_ext_insert_extent(handle, inode, path,
&newex, flags);
if (!err && set_unwritten) {
if (io)
ext4_set_io_unwritten_flag(inode, io);
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
if (err && free_on_err) {
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@ -4241,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
* finish any pending end_io work so we won't run the risk of
* converting any truncated blocks to initialized later
*/
ext4_flush_completed_IO(inode);
ext4_flush_unwritten_io(inode);
/*
* probably first extent we're gonna free will be last in block
@ -4769,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
loff_t first_page_offset, last_page_offset;
int credits, err = 0;
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
err = filemap_write_and_wait_range(mapping,
offset, offset + length - 1);
if (err)
return err;
}
mutex_lock(&inode->i_mutex);
/* It's not possible punch hole on append only file */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
err = -EPERM;
goto out_mutex;
}
if (IS_SWAPFILE(inode)) {
err = -ETXTBSY;
goto out_mutex;
}
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
return 0;
goto out_mutex;
/*
* If the hole extends beyond i_size, set the hole
@ -4789,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
first_page_offset = first_page << PAGE_CACHE_SHIFT;
last_page_offset = last_page << PAGE_CACHE_SHIFT;
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
err = filemap_write_and_wait_range(mapping,
offset, offset + length - 1);
if (err)
return err;
}
/* Now release the pages */
if (last_page_offset > first_page_offset) {
truncate_pagecache_range(inode, first_page_offset,
last_page_offset - 1);
}
/* finish any pending end_io work */
ext4_flush_completed_IO(inode);
/* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
err = ext4_flush_unwritten_io(inode);
if (err)
goto out_dio;
inode_dio_wait(inode);
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto out_dio;
}
err = ext4_orphan_add(handle, inode);
if (err)
goto out;
/*
* Now we need to zero out the non-page-aligned data in the
@ -4903,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
up_write(&EXT4_I(inode)->i_data_sem);
out:
ext4_orphan_del(handle, inode);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
return err;
}
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,

View File

@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0;
}
static void ext4_aiodio_wait(struct inode *inode)
void ext4_unwritten_wait(struct inode *inode)
{
wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
}
/*
@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
"performance will be poor.",
inode->i_ino, current->comm);
mutex_lock(ext4_aio_mutex(inode));
ext4_aiodio_wait(inode);
ext4_unwritten_wait(inode);
}
BUG_ON(iocb->ki_pos != pos);

View File

@ -34,87 +34,6 @@
#include <trace/events/ext4.h>
static void dump_completed_IO(struct inode * inode)
{
#ifdef EXT4FS_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
return;
}
ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
cur = &io->list;
before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
io1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1);
}
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
#endif
}
/*
* This function is called from ext4_sync_file().
*
* When IO is completed, the work to convert unwritten extents to
* written is queued on workqueue but may not get immediately
* scheduled. When fsync is called, we need to ensure the
* conversion is complete before fsync returns.
* The inode keeps track of a list of pending/completed IO that
* might needs to do the conversion. This function walks through
* the list and convert the related unwritten extents for completed IO
* to written.
* The function return the number of pending IOs on success.
*/
int ext4_flush_completed_IO(struct inode *inode)
{
ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret = 0;
int ret2 = 0;
dump_completed_IO(inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&ei->i_completed_io_list)){
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
list_del_init(&io->list);
io->flag |= EXT4_IO_END_IN_FSYNC;
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
*
* When ext4_sync_file() is called, run_queue() may already
* about to flush the work corresponding to this io structure.
* It will be upset if it founds the io structure related
* to the work-to-be schedule is freed.
*
* Thus we need to keep the io structure still valid here after
* conversion finished. The io structure has a flag to
* avoid double converting from both fsync and background work
* queue work.
*/
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
ret = ext4_end_io_nolock(io);
if (ret < 0)
ret2 = ret;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
io->flag &= ~EXT4_IO_END_IN_FSYNC;
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;
}
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since
@ -203,7 +122,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int ret;
int ret, err;
tid_t commit_tid;
bool needs_barrier = false;
@ -219,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (inode->i_sb->s_flags & MS_RDONLY)
goto out;
ret = ext4_flush_completed_IO(inode);
ret = ext4_flush_unwritten_io(inode);
if (ret < 0)
goto out;
@ -255,8 +174,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
needs_barrier = true;
jbd2_log_start_commit(journal, commit_tid);
ret = jbd2_log_wait_commit(journal, commit_tid);
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
if (needs_barrier) {
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
if (!ret)
ret = err;
}
out:
mutex_unlock(&inode->i_mutex);
trace_ext4_sync_file_exit(inode, ret);

View File

@ -697,6 +697,15 @@ got_group:
if (!gdp)
goto fail;
/*
* Check free inodes count before loading bitmap.
*/
if (ext4_free_inodes_count(sb, gdp) == 0) {
if (++group == ngroups)
group = 0;
continue;
}
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
if (!inode_bitmap_bh)

View File

@ -807,16 +807,30 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
retry:
if (rw == READ && ext4_should_dioread_nolock(inode)) {
if (unlikely(!list_empty(&ei->i_completed_io_list))) {
if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) {
mutex_lock(&inode->i_mutex);
ext4_flush_completed_IO(inode);
ext4_flush_unwritten_io(inode);
mutex_unlock(&inode->i_mutex);
}
/*
* Nolock dioread optimization may be dynamically disabled
* via ext4_inode_block_unlocked_dio(). Check inode's state
* while holding extra i_dio_count ref.
*/
atomic_inc(&inode->i_dio_count);
smp_mb();
if (unlikely(ext4_test_inode_state(inode,
EXT4_STATE_DIOREAD_LOCK))) {
inode_dio_done(inode);
goto locked;
}
ret = __blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext4_get_block, NULL, NULL, 0);
inode_dio_done(inode);
} else {
locked:
ret = blockdev_direct_IO(rw, iocb, inode, iov,
offset, nr_segs, ext4_get_block);

View File

@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
err = ext4_map_blocks(handle, inode, &map,
create ? EXT4_GET_BLOCKS_CREATE : 0);
/* ensure we send some value back into *errp */
*errp = 0;
if (err < 0)
*errp = err;
if (err <= 0)
return NULL;
*errp = 0;
bh = sb_getblk(inode->i_sb, map.m_pblk);
if (!bh) {
@ -1954,9 +1956,6 @@ out:
return ret;
}
static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
/*
* Note that we don't need to start a transaction unless we're journaling data
* because we should have holes filled from ext4_page_mkwrite(). We even don't
@ -2463,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb)
free_blocks = EXT4_C2B(sbi,
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
/*
* Start pushing delalloc when 1/2 of free blocks are dirty.
*/
if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
!writeback_in_progress(sb->s_bdi) &&
down_read_trylock(&sb->s_umount)) {
writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
up_read(&sb->s_umount);
}
if (2 * free_blocks < 3 * dirty_blocks ||
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
/*
@ -2471,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb)
*/
return 1;
}
/*
* Even if we don't switch but are nearing capacity,
* start pushing delalloc when 1/2 of free blocks are dirty.
*/
if (free_blocks < 2 * dirty_blocks)
writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
return 0;
}
@ -2879,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq;
unsigned long flags;
struct ext4_inode_info *ei;
/* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size)
@ -2910,24 +2909,14 @@ out:
io_end->iocb = iocb;
io_end->result = ret;
}
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* Add the io_end to per-inode completed aio dio list*/
ei = EXT4_I(io_end->inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
ext4_add_complete_io(io_end);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
{
ext4_io_end_t *io_end = bh->b_private;
struct workqueue_struct *wq;
struct inode *inode;
unsigned long flags;
if (!test_clear_buffer_uninit(bh) || !io_end)
goto out;
@ -2946,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
*/
inode = io_end->inode;
ext4_set_io_unwritten_flag(inode, io_end);
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
ext4_add_complete_io(io_end);
out:
bh->b_private = NULL;
bh->b_end_io = NULL;
@ -3029,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
overwrite = *((int *)iocb->private);
if (overwrite) {
atomic_inc(&inode->i_dio_count);
down_read(&EXT4_I(inode)->i_data_sem);
mutex_unlock(&inode->i_mutex);
}
@ -3054,7 +3036,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
* hook to the iocb.
*/
iocb->private = NULL;
EXT4_I(inode)->cur_aio_dio = NULL;
ext4_inode_aio_set(inode, NULL);
if (!is_sync_kiocb(iocb)) {
ext4_io_end_t *io_end =
ext4_init_io_end(inode, GFP_NOFS);
@ -3071,7 +3053,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
* is a unwritten extents needs to be converted
* when IO is completed.
*/
EXT4_I(inode)->cur_aio_dio = iocb->private;
ext4_inode_aio_set(inode, io_end);
}
if (overwrite)
@ -3091,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
NULL,
DIO_LOCKING);
if (iocb->private)
EXT4_I(inode)->cur_aio_dio = NULL;
ext4_inode_aio_set(inode, NULL);
/*
* The io_end structure takes a reference to the inode,
* that structure needs to be destroyed and the
@ -3126,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
retake_lock:
/* take i_mutex locking again if we do a ovewrite dio */
if (overwrite) {
inode_dio_done(inode);
up_read(&EXT4_I(inode)->i_data_sem);
mutex_lock(&inode->i_mutex);
}
@ -4052,6 +4035,7 @@ static int ext4_do_update_inode(handle_t *handle,
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
int need_datasync = 0;
uid_t i_uid;
gid_t i_gid;
@ -4102,7 +4086,10 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
ext4_isize_set(raw_inode, ei->i_disksize);
if (ei->i_disksize != ext4_isize(raw_inode)) {
ext4_isize_set(raw_inode, ei->i_disksize);
need_datasync = 1;
}
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@ -4155,7 +4142,7 @@ static int ext4_do_update_inode(handle_t *handle,
err = rc;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
ext4_update_inode_fsync_trans(handle, inode, 0);
ext4_update_inode_fsync_trans(handle, inode, need_datasync);
out_brelse:
brelse(bh);
ext4_std_error(inode->i_sb, err);
@ -4298,7 +4285,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
inode_dio_wait(inode);
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@ -4347,8 +4333,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size != i_size_read(inode))
if (attr->ia_size != i_size_read(inode)) {
truncate_setsize(inode, attr->ia_size);
/* Inode size will be reduced, wait for dio in flight.
* Temporarily disable dioread_nolock to prevent
* livelock. */
if (orphan) {
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
ext4_inode_resume_unlocked_dio(inode);
}
}
ext4_truncate(inode);
}
@ -4727,6 +4722,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return err;
}
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
jbd2_journal_lock_updates(journal);
/*
@ -4746,6 +4745,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_set_aops(inode);
jbd2_journal_unlock_updates(journal);
ext4_inode_resume_unlocked_dio(inode);
/* Finally we can mark the inode as dirty. */
@ -4780,6 +4780,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
int retries = 0;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&

View File

@ -366,26 +366,11 @@ group_add_out:
return -EOPNOTSUPP;
}
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_META_BG)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not (yet) supported with meta_bg");
return -EOPNOTSUPP;
}
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) {
return -EFAULT;
}
if (n_blocks_count > MAX_32_NUM &&
!EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_64BIT)) {
ext4_msg(sb, KERN_ERR,
"File system only supports 32-bit block numbers");
return -EOPNOTSUPP;
}
err = ext4_resize_begin(sb);
if (err)
return err;
@ -420,13 +405,6 @@ resizefs_out:
if (!blk_queue_discard(q))
return -EOPNOTSUPP;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
ext4_msg(sb, KERN_ERR,
"FITRIM not supported with bigalloc");
return -EOPNOTSUPP;
}
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;

View File

@ -24,6 +24,7 @@
#include "ext4_jbd2.h"
#include "mballoc.h"
#include <linux/debugfs.h>
#include <linux/log2.h>
#include <linux/slab.h>
#include <trace/events/ext4.h>
@ -1338,17 +1339,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
mb_check_buddy(e4b);
}
static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
static int mb_find_extent(struct ext4_buddy *e4b, int block,
int needed, struct ext4_free_extent *ex)
{
int next = block;
int max;
int max, order;
void *buddy;
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
BUG_ON(ex == NULL);
buddy = mb_find_buddy(e4b, order, &max);
buddy = mb_find_buddy(e4b, 0, &max);
BUG_ON(buddy == NULL);
BUG_ON(block >= max);
if (mb_test_bit(block, buddy)) {
@ -1358,12 +1359,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
return 0;
}
/* FIXME dorp order completely ? */
if (likely(order == 0)) {
/* find actual order */
order = mb_find_order_for_block(e4b, block);
block = block >> order;
}
/* find actual order */
order = mb_find_order_for_block(e4b, block);
block = block >> order;
ex->fe_len = 1 << order;
ex->fe_start = block << order;
@ -1549,7 +1547,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
/* recheck chunk's availability - we don't know
* when it was found (within this lock-unlock
* period or not) */
max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
if (max >= gex->fe_len) {
ext4_mb_use_best_found(ac, e4b);
return;
@ -1641,7 +1639,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return err;
ext4_lock_group(ac->ac_sb, group);
max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
ac->ac_b_ex = ex;
@ -1662,17 +1660,20 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
int max;
int err;
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
struct ext4_free_extent ex;
if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
return 0;
if (grp->bb_free == 0)
return 0;
err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
if (err)
return err;
ext4_lock_group(ac->ac_sb, group);
max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
@ -1788,7 +1789,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
break;
}
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
@ -1840,7 +1841,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
if (!mb_test_bit(i, bitmap)) {
max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
if (max >= sbi->s_stripe) {
ac->ac_found++;
ac->ac_b_ex = ex;
@ -1862,6 +1863,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
BUG_ON(cr < 0 || cr >= 4);
free = grp->bb_free;
if (free == 0)
return 0;
if (cr <= 2 && free < ac->ac_g_ex.fe_len)
return 0;
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
int ret = ext4_mb_init_group(ac->ac_sb, group);
@ -1869,10 +1876,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
return 0;
}
free = grp->bb_free;
fragments = grp->bb_fragments;
if (free == 0)
return 0;
if (fragments == 0)
return 0;
@ -2163,6 +2167,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
return cachep;
}
/*
* Allocate the top-level s_group_info array for the specified number
* of groups
*/
int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned size;
struct ext4_group_info ***new_groupinfo;
size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
if (size <= sbi->s_group_info_size)
return 0;
size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
if (!new_groupinfo) {
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
return -ENOMEM;
}
if (sbi->s_group_info) {
memcpy(new_groupinfo, sbi->s_group_info,
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
ext4_kvfree(sbi->s_group_info);
}
sbi->s_group_info = new_groupinfo;
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
sbi->s_group_info_size);
return 0;
}
/* Create and initialize ext4_group_info data for the given group. */
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *desc)
@ -2195,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
goto exit_group_info;
}
memset(meta_group_info[i], 0, kmem_cache_size(cachep));
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&(meta_group_info[i]->bb_state));
@ -2252,49 +2288,14 @@ static int ext4_mb_init_backend(struct super_block *sb)
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t i;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
int num_meta_group_infos;
int num_meta_group_infos_max;
int array_size;
int err;
struct ext4_group_desc *desc;
struct kmem_cache *cachep;
/* This is the number of blocks used by GDT */
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
err = ext4_mb_alloc_groupinfo(sb, ngroups);
if (err)
return err;
/*
* This is the total number of blocks used by GDT including
* the number of reserved blocks for GDT.
* The s_group_info array is allocated with this value
* to allow a clean online resize without a complex
* manipulation of pointer.
* The drawback is the unused memory when no resize
* occurs but it's very low in terms of pages
* (see comments below)
* Need to handle this properly when META_BG resizing is allowed
*/
num_meta_group_infos_max = num_meta_group_infos +
le16_to_cpu(es->s_reserved_gdt_blocks);
/*
* array_size is the size of s_group_info array. We round it
* to the next power of two because this approximation is done
* internally by kmalloc so we can have some more memory
* for free here (e.g. may be used for META_BG resize).
*/
array_size = 1;
while (array_size < sizeof(*sbi->s_group_info) *
num_meta_group_infos_max)
array_size = array_size << 1;
/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
* kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
* So a two level scheme suffices for now. */
sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
if (sbi->s_group_info == NULL) {
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
return -ENOMEM;
}
sbi->s_buddy_cache = new_inode(sb);
if (sbi->s_buddy_cache == NULL) {
ext4_msg(sb, KERN_ERR, "can't get new inode");
@ -2322,7 +2323,7 @@ err_freebuddy:
cachep = get_groupinfo_cache(sb->s_blocksize_bits);
while (i-- > 0)
kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = num_meta_group_infos;
i = sbi->s_group_info_size;
while (i-- > 0)
kfree(sbi->s_group_info[i]);
iput(sbi->s_buddy_cache);
@ -4008,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
memset(ac, 0, sizeof(struct ext4_allocation_context));
ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
@ -4291,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}
}
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
if (!ac) {
ar->len = 0;
*errp = -ENOMEM;
@ -4657,6 +4657,8 @@ do_more:
* with group lock held. generate_buddy look at
* them with group lock_held
*/
if (test_opt(sb, DISCARD))
ext4_issue_discard(sb, block_group, bit, count);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
mb_free_blocks(inode, &e4b, bit, count_clusters);
@ -4988,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
start = range->start >> sb->s_blocksize_bits;
end = start + (range->len >> sb->s_blocksize_bits) - 1;
minlen = range->minlen >> sb->s_blocksize_bits;
minlen = EXT4_NUM_B2C(EXT4_SB(sb),
range->minlen >> sb->s_blocksize_bits);
if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
unlikely(start >= max_blks))
@ -5048,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
out:
range->len = trimmed * sb->s_blocksize;
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
return ret;
}

View File

@ -64,11 +64,6 @@ extern u8 mb_enable_debug;
*/
#define MB_DEFAULT_MIN_TO_SCAN 10
/*
* How many groups mballoc will scan looking for the best chunk
*/
#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
/*
* with 'ext4_mb_stats' allocator will collect stats that will be
* shown at umount. The collecting costs though!

View File

@ -140,56 +140,22 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
return 1;
}
/**
* mext_check_null_inode - NULL check for two inodes
*
* If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
*/
static int
mext_check_null_inode(struct inode *inode1, struct inode *inode2,
const char *function, unsigned int line)
{
int ret = 0;
if (inode1 == NULL) {
__ext4_error(inode2->i_sb, function, line,
"Both inodes should not be NULL: "
"inode1 NULL inode2 %lu", inode2->i_ino);
ret = -EIO;
} else if (inode2 == NULL) {
__ext4_error(inode1->i_sb, function, line,
"Both inodes should not be NULL: "
"inode1 %lu inode2 NULL", inode1->i_ino);
ret = -EIO;
}
return ret;
}
/**
* double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire write lock of i_data_sem of the two inodes (orig and donor) by
* i_ino order.
* Acquire write lock of i_data_sem of the two inodes
*/
static void
double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
double_down_write_data_sem(struct inode *first, struct inode *second)
{
struct inode *first = orig_inode, *second = donor_inode;
if (first < second) {
down_write(&EXT4_I(first)->i_data_sem);
down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
} else {
down_write(&EXT4_I(second)->i_data_sem);
down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
/*
* Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can
* compare pointers that don't come from the same array.
*/
if (donor_inode->i_ino < orig_inode->i_ino) {
first = donor_inode;
second = orig_inode;
}
down_write(&EXT4_I(first)->i_data_sem);
down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
}
/**
@ -604,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
tmp_dext->ee_block =
cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
le32_add_cpu(&tmp_dext->ee_block, diff);
le16_add_cpu(&tmp_dext->ee_len, -diff);
if (max_count < ext4_ext_get_actual_len(tmp_dext))
tmp_dext->ee_len = cpu_to_le16(max_count);
@ -628,6 +593,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
return 0;
}
/**
* mext_check_coverage - Check that all extents in range has the same type
*
* @inode: inode in question
* @from: block offset of inode
* @count: block count to be checked
* @uninit: extents expected to be uninitialized
* @err: pointer to save error value
*
* Return 1 if all extents in range has expected type, and zero otherwise.
*/
static int
mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
int uninit, int *err)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent *ext;
ext4_lblk_t last = from + count;
while (from < last) {
*err = get_ext_path(inode, from, &path);
if (*err)
return 0;
ext = path[ext_depth(inode)].p_ext;
if (!ext) {
ext4_ext_drop_refs(path);
return 0;
}
if (uninit != ext4_ext_is_uninitialized(ext)) {
ext4_ext_drop_refs(path);
return 0;
}
from += ext4_ext_get_actual_len(ext);
ext4_ext_drop_refs(path);
}
return 1;
}
/**
* mext_replace_branches - Replace original extents with new extents
*
@ -663,9 +665,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
int replaced_count = 0;
int dext_alen;
/* Protect extent trees against block allocations via delalloc */
double_down_write_data_sem(orig_inode, donor_inode);
/* Get the original extent for the block "orig_off" */
*err = get_ext_path(orig_inode, orig_off, &orig_path);
if (*err)
@ -764,11 +763,121 @@ out:
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
double_up_write_data_sem(orig_inode, donor_inode);
return replaced_count;
}
/**
* mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
*
* @inode1: the inode structure
* @inode2: the inode structure
* @index: page index
* @page: result page vector
*
* Grab two locked pages for inode's by inode order
*/
static int
mext_page_double_lock(struct inode *inode1, struct inode *inode2,
pgoff_t index, struct page *page[2])
{
struct address_space *mapping[2];
unsigned fl = AOP_FLAG_NOFS;
BUG_ON(!inode1 || !inode2);
if (inode1 < inode2) {
mapping[0] = inode1->i_mapping;
mapping[1] = inode2->i_mapping;
} else {
mapping[0] = inode2->i_mapping;
mapping[1] = inode1->i_mapping;
}
page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
if (!page[0])
return -ENOMEM;
page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
if (!page[1]) {
unlock_page(page[0]);
page_cache_release(page[0]);
return -ENOMEM;
}
if (inode1 > inode2) {
struct page *tmp;
tmp = page[0];
page[0] = page[1];
page[1] = tmp;
}
return 0;
}
/* Force page buffers uptodate w/o dropping page's lock */
static int
mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
sector_t block;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
unsigned int blocksize, block_start, block_end;
int i, err, nr = 0, partial = 0;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (PageUptodate(page))
return 0;
blocksize = 1 << inode->i_blkbits;
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
head = page_buffers(page);
block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
for (bh = head, block_start = 0; bh != head || !block_start;
block++, block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
continue;
}
if (buffer_uptodate(bh))
continue;
if (!buffer_mapped(bh)) {
int err = 0;
err = ext4_get_block(inode, block, bh, 0);
if (err) {
SetPageError(page);
return err;
}
if (!buffer_mapped(bh)) {
zero_user(page, block_start, blocksize);
if (!err)
set_buffer_uptodate(bh);
continue;
}
}
BUG_ON(nr >= MAX_BUF_PER_PAGE);
arr[nr++] = bh;
}
/* No io required */
if (!nr)
goto out;
for (i = 0; i < nr; i++) {
bh = arr[i];
if (!bh_uptodate_or_lock(bh)) {
err = bh_submit_read(bh);
if (err)
return err;
}
}
out:
if (!partial)
SetPageUptodate(page);
return 0;
}
/**
* move_extent_per_page - Move extent data per page
*
@ -791,26 +900,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
int block_len_in_page, int uninit, int *err)
{
struct inode *orig_inode = o_filp->f_dentry->d_inode;
struct address_space *mapping = orig_inode->i_mapping;
struct buffer_head *bh;
struct page *page = NULL;
const struct address_space_operations *a_ops = mapping->a_ops;
struct page *pagep[2] = {NULL, NULL};
handle_t *handle;
ext4_lblk_t orig_blk_offset;
long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
unsigned int w_flags = 0;
unsigned int tmp_data_size, data_size, replaced_size;
void *fsdata;
int i, jblocks;
int err2 = 0;
int err2, jblocks, retries = 0;
int replaced_count = 0;
int from = data_offset_in_page << orig_inode->i_blkbits;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
/*
* It needs twice the amount of ordinary journal buffers because
* inode and donor_inode may change each different metadata blocks.
*/
again:
*err = 0;
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
handle = ext4_journal_start(orig_inode, jblocks);
if (IS_ERR(handle)) {
@ -824,19 +931,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
/*
* If orig extent is uninitialized one,
* it's not necessary force the page into memory
* and then force it to be written out again.
* Just swap data blocks between orig and donor.
*/
if (uninit) {
replaced_count = mext_replace_branches(handle, orig_inode,
donor_inode, orig_blk_offset,
block_len_in_page, err);
goto out2;
}
offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
/* Calculate data_size */
@ -858,75 +952,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
replaced_size = data_size;
*err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
&page, &fsdata);
*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
pagep);
if (unlikely(*err < 0))
goto out;
goto stop_journal;
/*
* If orig extent was uninitialized it can become initialized
* at any time after i_data_sem was dropped, in order to
* serialize with delalloc we have recheck extent while we
* hold page's lock, if it is still the case data copy is not
* necessary, just swap data blocks between orig and donor.
*/
if (uninit) {
double_down_write_data_sem(orig_inode, donor_inode);
/* If any of extents in range became initialized we have to
* fallback to data copying */
uninit = mext_check_coverage(orig_inode, orig_blk_offset,
block_len_in_page, 1, err);
if (*err)
goto drop_data_sem;
if (!PageUptodate(page)) {
mapping->a_ops->readpage(o_filp, page);
lock_page(page);
uninit &= mext_check_coverage(donor_inode, orig_blk_offset,
block_len_in_page, 1, err);
if (*err)
goto drop_data_sem;
if (!uninit) {
double_up_write_data_sem(orig_inode, donor_inode);
goto data_copy;
}
if ((page_has_private(pagep[0]) &&
!try_to_release_page(pagep[0], 0)) ||
(page_has_private(pagep[1]) &&
!try_to_release_page(pagep[1], 0))) {
*err = -EBUSY;
goto drop_data_sem;
}
replaced_count = mext_replace_branches(handle, orig_inode,
donor_inode, orig_blk_offset,
block_len_in_page, err);
drop_data_sem:
double_up_write_data_sem(orig_inode, donor_inode);
goto unlock_pages;
}
data_copy:
*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
if (*err)
goto unlock_pages;
/* At this point all buffers in range are uptodate, old mapping layout
* is no longer required, try to drop it now. */
if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
(page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
*err = -EBUSY;
goto unlock_pages;
}
/*
* try_to_release_page() doesn't call releasepage in writeback mode.
* We should care about the order of writing to the same file
* by multiple move extent processes.
* It needs to call wait_on_page_writeback() to wait for the
* writeback of the page.
*/
wait_on_page_writeback(page);
/* Release old bh and drop refs */
try_to_release_page(page, 0);
replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset, block_len_in_page,
&err2);
if (err2) {
orig_blk_offset,
block_len_in_page, err);
if (*err) {
if (replaced_count) {
block_len_in_page = replaced_count;
replaced_size =
block_len_in_page << orig_inode->i_blkbits;
} else
goto out;
goto unlock_pages;
}
/* Perform all necessary steps similar write_begin()/write_end()
* but keeping in mind that i_size will not change */
*err = __block_write_begin(pagep[0], from, from + replaced_size,
ext4_get_block);
if (!*err)
*err = block_commit_write(pagep[0], from, from + replaced_size);
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
if (unlikely(*err < 0))
goto repair_branches;
bh = page_buffers(page);
for (i = 0; i < data_offset_in_page; i++)
bh = bh->b_this_page;
/* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */
*err = ext4_jbd2_file_inode(handle, orig_inode);
for (i = 0; i < block_len_in_page; i++) {
*err = ext4_get_block(orig_inode,
(sector_t)(orig_blk_offset + i), bh, 0);
if (*err < 0)
goto out;
if (bh->b_this_page != NULL)
bh = bh->b_this_page;
}
*err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
page, fsdata);
page = NULL;
out:
if (unlikely(page)) {
if (PageLocked(page))
unlock_page(page);
page_cache_release(page);
ext4_journal_stop(handle);
}
out2:
unlock_pages:
unlock_page(pagep[0]);
page_cache_release(pagep[0]);
unlock_page(pagep[1]);
page_cache_release(pagep[1]);
stop_journal:
ext4_journal_stop(handle);
if (err2)
*err = err2;
/* Buffer was busy because probably is pinned to journal transaction,
* force transaction commit may help to free it. */
if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
&retries))
goto again;
return replaced_count;
repair_branches:
/*
* This should never ever happen!
* Extents are swapped already, but we are not able to copy data.
* Try to swap extents to it's original places
*/
double_down_write_data_sem(orig_inode, donor_inode);
replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
orig_blk_offset,
block_len_in_page, &err2);
double_up_write_data_sem(orig_inode, donor_inode);
if (replaced_count != block_len_in_page) {
EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
"Unable to copy data block,"
" data will be lost.");
*err = -EIO;
}
replaced_count = 0;
goto unlock_pages;
}
/**
@ -969,14 +1108,6 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
/* Files should be in the same ext4 FS */
if (orig_inode->i_sb != donor_inode->i_sb) {
ext4_debug("ext4 move extent: The argument files "
"should be in same FS [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Ext4 move extent supports only extent based file */
if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
ext4_debug("ext4 move extent: orig file is not extents "
@ -1002,7 +1133,6 @@ mext_check_arguments(struct inode *orig_inode,
}
if ((orig_start >= EXT_MAX_BLOCKS) ||
(donor_start >= EXT_MAX_BLOCKS) ||
(*len > EXT_MAX_BLOCKS) ||
(orig_start + *len >= EXT_MAX_BLOCKS)) {
ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
@ -1072,35 +1202,19 @@ mext_check_arguments(struct inode *orig_inode,
* @inode1: the inode structure
* @inode2: the inode structure
*
* Lock two inodes' i_mutex by i_ino order.
* If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
* Lock two inodes' i_mutex
*/
static int
static void
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
{
int ret = 0;
BUG_ON(inode1 == NULL && inode2 == NULL);
ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0)
goto out;
if (inode1 == inode2) {
mutex_lock(&inode1->i_mutex);
goto out;
}
if (inode1->i_ino < inode2->i_ino) {
BUG_ON(inode1 == inode2);
if (inode1 < inode2) {
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
}
out:
return ret;
}
/**
@ -1109,28 +1223,13 @@ out:
* @inode1: the inode that is released first
* @inode2: the inode that is released second
*
* If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
*/
static int
static void
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
{
int ret = 0;
BUG_ON(inode1 == NULL && inode2 == NULL);
ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0)
goto out;
if (inode1)
mutex_unlock(&inode1->i_mutex);
if (inode2 && inode2 != inode1)
mutex_unlock(&inode2->i_mutex);
out:
return ret;
mutex_unlock(&inode1->i_mutex);
mutex_unlock(&inode2->i_mutex);
}
/**
@ -1187,16 +1286,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
ext4_lblk_t rest_blocks;
pgoff_t orig_page_offset = 0, seq_end_page;
int ret1, ret2, depth, last_extent = 0;
int ret, depth, last_extent = 0;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
int data_offset_in_page;
int block_len_in_page;
int uninit;
/* orig and donor should be different file */
if (orig_inode->i_ino == donor_inode->i_ino) {
if (orig_inode->i_sb != donor_inode->i_sb) {
ext4_debug("ext4 move extent: The argument files "
"should be in same FS [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* orig and donor should be different inodes */
if (orig_inode == donor_inode) {
ext4_debug("ext4 move extent: The argument files should not "
"be same file [ino:orig %lu, donor %lu]\n",
"be same inode [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
@ -1208,18 +1314,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* TODO: This is non obvious task to swap blocks for inodes with full
jornaling enabled */
if (ext4_should_journal_data(orig_inode) ||
ext4_should_journal_data(donor_inode)) {
return -EINVAL;
}
/* Protect orig and donor inodes against a truncate */
ret1 = mext_inode_double_lock(orig_inode, donor_inode);
if (ret1 < 0)
return ret1;
mext_inode_double_lock(orig_inode, donor_inode);
/* Wait for all existing dio workers */
ext4_inode_block_unlocked_dio(orig_inode);
ext4_inode_block_unlocked_dio(donor_inode);
inode_dio_wait(orig_inode);
inode_dio_wait(donor_inode);
/* Protect extent tree against block allocations via delalloc */
double_down_write_data_sem(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
donor_start, &len);
if (ret1)
if (ret)
goto out;
file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
@ -1227,13 +1342,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
if (file_end < block_end)
len -= block_end - file_end;
ret1 = get_ext_path(orig_inode, block_start, &orig_path);
if (ret1)
ret = get_ext_path(orig_inode, block_start, &orig_path);
if (ret)
goto out;
/* Get path structure to check the hole */
ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
if (ret1)
ret = get_ext_path(orig_inode, block_start, &holecheck_path);
if (ret)
goto out;
depth = ext_depth(orig_inode);
@ -1252,13 +1367,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
last_extent = mext_next_extent(orig_inode,
holecheck_path, &ext_cur);
if (last_extent < 0) {
ret1 = last_extent;
ret = last_extent;
goto out;
}
last_extent = mext_next_extent(orig_inode, orig_path,
&ext_dummy);
if (last_extent < 0) {
ret1 = last_extent;
ret = last_extent;
goto out;
}
seq_start = le32_to_cpu(ext_cur->ee_block);
@ -1272,7 +1387,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
if (le32_to_cpu(ext_cur->ee_block) > block_end) {
ext4_debug("ext4 move extent: The specified range of file "
"may be the hole\n");
ret1 = -EINVAL;
ret = -EINVAL;
goto out;
}
@ -1292,7 +1407,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
last_extent = mext_next_extent(orig_inode, holecheck_path,
&ext_cur);
if (last_extent < 0) {
ret1 = last_extent;
ret = last_extent;
break;
}
add_blocks = ext4_ext_get_actual_len(ext_cur);
@ -1349,18 +1464,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
orig_page_offset,
data_offset_in_page,
block_len_in_page, uninit,
&ret1);
&ret);
/* Count how many blocks we have exchanged */
*moved_len += block_len_in_page;
if (ret1 < 0)
if (ret < 0)
break;
if (*moved_len > len) {
EXT4_ERROR_INODE(orig_inode,
"We replaced blocks too much! "
"sum of replaced: %llu requested: %llu",
*moved_len, len);
ret1 = -EIO;
ret = -EIO;
break;
}
@ -1374,22 +1489,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
}
double_down_write_data_sem(orig_inode, donor_inode);
if (ret1 < 0)
if (ret < 0)
break;
/* Decrease buffer counter */
if (holecheck_path)
ext4_ext_drop_refs(holecheck_path);
ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
if (ret1)
ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
if (ret)
break;
depth = holecheck_path->p_depth;
/* Decrease buffer counter */
if (orig_path)
ext4_ext_drop_refs(orig_path);
ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
if (ret1)
ret = get_ext_path(orig_inode, seq_start, &orig_path);
if (ret)
break;
ext_cur = holecheck_path[depth].p_ext;
@ -1412,12 +1527,9 @@ out:
kfree(holecheck_path);
}
double_up_write_data_sem(orig_inode, donor_inode);
ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
ext4_inode_resume_unlocked_dio(orig_inode);
ext4_inode_resume_unlocked_dio(donor_inode);
mext_inode_double_unlock(orig_inode, donor_inode);
if (ret1)
return ret1;
else if (ret2)
return ret2;
return 0;
return ret;
}

View File

@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle,
{
struct buffer_head *bh;
if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
((inode->i_size >> 10) >=
EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
*err = -ENOSPC;
return NULL;
}
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
bh = ext4_bread(handle, inode, *block, 1, err);
@ -67,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle,
bh = NULL;
}
}
if (!bh && !(*err)) {
*err = -EIO;
ext4_error(inode->i_sb,
"Directory hole detected on inode %lu\n",
inode->i_ino);
}
return bh;
}
@ -594,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
u32 hash;
frame->bh = NULL;
if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
if (*err == 0)
*err = ERR_BAD_DX_DIR;
goto fail;
}
root = (struct dx_root *) bh->b_data;
if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 &&
@ -696,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
frame->entries = entries;
frame->at = at;
if (!indirect--) return frame;
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
if (!(*err))
*err = ERR_BAD_DX_DIR;
goto fail2;
}
at = entries = ((struct dx_node *) bh->b_data)->entries;
if (!buffer_verified(bh) &&
@ -807,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
*/
while (num_frames--) {
if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
0, &err)))
0, &err))) {
if (!err) {
ext4_error(dir->i_sb,
"Directory hole detected on inode %lu\n",
dir->i_ino);
return -EIO;
}
return err; /* Failure */
}
if (!buffer_verified(bh) &&
!ext4_dx_csum_verify(dir,
@ -839,12 +865,19 @@ static int htree_dirblock_to_tree(struct file *dir_file,
{
struct buffer_head *bh;
struct ext4_dir_entry_2 *de, *top;
int err, count = 0;
int err = 0, count = 0;
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
if (!err) {
err = -EIO;
ext4_error(dir->i_sb,
"Directory hole detected on inode %lu\n",
dir->i_ino);
}
return err;
}
if (!buffer_verified(bh) &&
!ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
@ -1267,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
return NULL;
do {
block = dx_get_block(frame->at);
if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
if (!(*err)) {
*err = -EIO;
ext4_error(dir->i_sb,
"Directory hole detected on inode %lu\n",
dir->i_ino);
}
goto errout;
}
if (!buffer_verified(bh) &&
!ext4_dirent_csum_verify(dir,
@ -1801,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
blocks = dir->i_size >> sb->s_blocksize_bits;
for (block = 0; block < blocks; block++) {
bh = ext4_bread(handle, dir, block, 0, &retval);
if(!bh)
if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) {
if (!retval) {
retval = -EIO;
ext4_error(inode->i_sb,
"Directory hole detected on inode %lu\n",
inode->i_ino);
}
return retval;
}
if (!buffer_verified(bh) &&
!ext4_dirent_csum_verify(dir,
(struct ext4_dir_entry *)bh->b_data))
@ -1860,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
entries = frame->entries;
at = frame->at;
if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) {
if (!err) {
err = -EIO;
ext4_error(dir->i_sb,
"Directory hole detected on inode %lu\n",
dir->i_ino);
}
goto cleanup;
}
if (!buffer_verified(bh) &&
!ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
@ -2149,9 +2202,7 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
#ifdef CONFIG_EXT4_FS_XATTR
inode->i_op = &ext4_special_inode_operations;
#endif
err = ext4_add_nondir(handle, dentry, inode);
}
ext4_journal_stop(handle);
@ -2199,9 +2250,15 @@ retry:
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
dir_block = ext4_bread(handle, inode, 0, 1, &err);
if (!dir_block)
if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
if (!err) {
err = -EIO;
ext4_error(inode->i_sb,
"Directory hole detected on inode %lu\n",
inode->i_ino);
}
goto out_clear_inode;
}
BUFFER_TRACE(dir_block, "get_write_access");
err = ext4_journal_get_write_access(handle, dir_block);
if (err)
@ -2318,6 +2375,11 @@ static int empty_dir(struct inode *inode)
EXT4_ERROR_INODE(inode,
"error %d reading directory "
"lblock %u", err, lblock);
else
ext4_warning(inode->i_sb,
"bad directory (dir #%lu) - no data block",
inode->i_ino);
offset += sb->s_blocksize;
continue;
}
@ -2362,7 +2424,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc;
int err = 0, rc;
if (!ext4_handle_valid(handle))
if (!EXT4_SB(sb)->s_journal)
return 0;
mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
@ -2436,8 +2498,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc;
int err = 0;
/* ext4_handle_valid() assumes a valid handle_t pointer */
if (handle && !ext4_handle_valid(handle))
if (!EXT4_SB(inode->i_sb)->s_journal)
return 0;
mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
@ -2456,7 +2517,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
* transaction handle with which to update the orphan list on
* disk, but we still need to remove the inode from the linked
* list in memory. */
if (sbi->s_journal && !handle)
if (!handle)
goto out;
err = ext4_reserve_inode_write(handle, inode, &iloc);
@ -2826,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end_rename;
}
retval = -EIO;
dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
if (!dir_bh)
if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) {
if (!retval) {
retval = -EIO;
ext4_error(old_inode->i_sb,
"Directory hole detected on inode %lu\n",
old_inode->i_ino);
}
goto end_rename;
}
if (!buffer_verified(dir_bh) &&
!ext4_dirent_csum_verify(old_inode,
(struct ext4_dir_entry *)dir_bh->b_data))

View File

@ -71,6 +71,9 @@ void ext4_free_io_end(ext4_io_end_t *io)
int i;
BUG_ON(!io);
BUG_ON(!list_empty(&io->list));
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
if (io->page)
put_page(io->page);
for (i = 0; i < io->num_io_pages; i++)
@ -81,13 +84,8 @@ void ext4_free_io_end(ext4_io_end_t *io)
kmem_cache_free(io_end_cachep, io);
}
/*
* check a range of space and convert unwritten extents to written.
*
* Called with inode->i_mutex; we depend on this when we manipulate
* io->flag, since we could otherwise race with ext4_flush_completed_IO()
*/
int ext4_end_io_nolock(ext4_io_end_t *io)
/* check a range of space and convert unwritten extents to written. */
static int ext4_end_io(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
@ -106,63 +104,136 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
"(inode %lu, offset %llu, size %zd, error %d)",
inode->i_ino, offset, size, ret);
}
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
if (io->flag & EXT4_IO_END_DIRECT)
inode_dio_done(inode);
/* Wake up anyone waiting on unwritten extent conversion */
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
wake_up_all(ext4_ioend_wq(io->inode));
return ret;
}
static void dump_completed_IO(struct inode *inode)
{
#ifdef EXT4FS_DEBUG
struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
ext4_debug("inode %lu completed_io list is empty\n",
inode->i_ino);
return;
}
ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino);
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) {
cur = &io->list;
before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list);
after = cur->next;
io1 = container_of(after, ext4_io_end_t, list);
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1);
}
#endif
}
/* Add the io_end to per-inode completed end_io list. */
void ext4_add_complete_io(ext4_io_end_t *io_end)
{
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
struct workqueue_struct *wq;
unsigned long flags;
BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (list_empty(&ei->i_completed_io_list)) {
io_end->flag |= EXT4_IO_END_QUEUED;
queue_work(wq, &io_end->work);
}
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
}
static int ext4_do_flush_completed_IO(struct inode *inode,
ext4_io_end_t *work_io)
{
ext4_io_end_t *io;
struct list_head unwritten, complete, to_free;
unsigned long flags;
struct ext4_inode_info *ei = EXT4_I(inode);
int err, ret = 0;
INIT_LIST_HEAD(&complete);
INIT_LIST_HEAD(&to_free);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
dump_completed_IO(inode);
list_replace_init(&ei->i_completed_io_list, &unwritten);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
while (!list_empty(&unwritten)) {
io = list_entry(unwritten.next, ext4_io_end_t, list);
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
list_del_init(&io->list);
err = ext4_end_io(io);
if (unlikely(!ret && err))
ret = err;
list_add_tail(&io->list, &complete);
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
while (!list_empty(&complete)) {
io = list_entry(complete.next, ext4_io_end_t, list);
io->flag &= ~EXT4_IO_END_UNWRITTEN;
/* end_io context can not be destroyed now because it still
* used by queued worker. Worker thread will destroy it later */
if (io->flag & EXT4_IO_END_QUEUED)
list_del_init(&io->list);
else
list_move(&io->list, &to_free);
}
/* If we are called from worker context, it is time to clear queued
* flag, and destroy it's end_io if it was converted already */
if (work_io) {
work_io->flag &= ~EXT4_IO_END_QUEUED;
if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
list_add_tail(&work_io->list, &to_free);
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
while (!list_empty(&to_free)) {
io = list_entry(to_free.next, ext4_io_end_t, list);
list_del_init(&io->list);
ext4_free_io_end(io);
}
return ret;
}
/*
* work on completed aio dio IO, to convert unwritten extents to extents
*/
static void ext4_end_io_work(struct work_struct *work)
{
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
struct inode *inode = io->inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
ext4_do_flush_completed_IO(io->inode, io);
}
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (io->flag & EXT4_IO_END_IN_FSYNC)
goto requeue;
if (list_empty(&io->list)) {
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
goto free;
}
if (!mutex_trylock(&inode->i_mutex)) {
bool was_queued;
requeue:
was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
io->flag |= EXT4_IO_END_QUEUED;
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/*
* Requeue the work instead of waiting so that the work
* items queued after this can be processed.
*/
queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work);
/*
* To prevent the ext4-dio-unwritten thread from keeping
* requeueing end_io requests and occupying cpu for too long,
* yield the cpu if it sees an end_io request that has already
* been requeued.
*/
if (was_queued)
yield();
return;
}
list_del_init(&io->list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
(void) ext4_end_io_nolock(io);
mutex_unlock(&inode->i_mutex);
free:
ext4_free_io_end(io);
int ext4_flush_unwritten_io(struct inode *inode)
{
int ret;
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
!(inode->i_state & I_FREEING));
ret = ext4_do_flush_completed_IO(inode, NULL);
ext4_unwritten_wait(inode);
return ret;
}
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
@ -195,9 +266,7 @@ static void buffer_io_error(struct buffer_head *bh)
static void ext4_end_bio(struct bio *bio, int error)
{
ext4_io_end_t *io_end = bio->bi_private;
struct workqueue_struct *wq;
struct inode *inode;
unsigned long flags;
int i;
sector_t bi_sector = bio->bi_sector;
@ -255,14 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
return;
}
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
ext4_add_complete_io(io_end);
}
void ext4_io_submit(struct ext4_io_submit *io)

View File

@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb)
smp_mb__after_clear_bit();
}
static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
ext4_group_t group) {
return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
EXT4_DESC_PER_BLOCK_BITS(sb);
}
static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
ext4_group_t group) {
group = ext4_meta_bg_first_group(sb, group);
return ext4_group_first_block_no(sb, group);
}
static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
ext4_group_t group) {
ext4_grpblk_t overhead;
overhead = ext4_bg_num_gdb(sb, group);
if (ext4_bg_has_super(sb, group))
overhead += 1 +
le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
return overhead;
}
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb,
ext4_fsblk_t end = start + input->blocks_count;
ext4_group_t group = input->group;
ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
unsigned overhead = ext4_bg_has_super(sb, group) ?
(1 + ext4_bg_num_gdb(sb, group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
unsigned overhead = ext4_group_overhead_blocks(sb, group);
ext4_fsblk_t metaend = start + overhead;
struct buffer_head *bh = NULL;
ext4_grpblk_t free_blocks_count, offset;
@ -200,13 +220,15 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
* be a partial of a flex group.
*
* @sb: super block of fs to which the groups belongs
*
* Returns 0 on a successful allocation of the metadata blocks in the
* block group.
*/
static void ext4_alloc_group_tables(struct super_block *sb,
static int ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ext4_fsblk_t start_blk;
ext4_fsblk_t last_blk;
ext4_group_t src_group;
@ -226,23 +248,24 @@ static void ext4_alloc_group_tables(struct super_block *sb,
(last_group & ~(flexbg_size - 1))));
next_group:
group = group_data[0].group;
if (src_group >= group_data[0].group + flex_gd->count)
return -ENOSPC;
start_blk = ext4_group_first_block_no(sb, src_group);
last_blk = start_blk + group_data[src_group - group].blocks_count;
overhead = ext4_bg_has_super(sb, src_group) ?
(1 + ext4_bg_num_gdb(sb, src_group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
overhead = ext4_group_overhead_blocks(sb, src_group);
start_blk += overhead;
BUG_ON(src_group >= group_data[0].group + flex_gd->count);
/* We collect contiguous blocks as much as possible. */
src_group++;
for (; src_group <= last_group; src_group++)
if (!ext4_bg_has_super(sb, src_group))
for (; src_group <= last_group; src_group++) {
overhead = ext4_group_overhead_blocks(sb, src_group);
if (overhead != 0)
last_blk += group_data[src_group - group].blocks_count;
else
break;
}
/* Allocate block bitmaps */
for (; bb_index < flex_gd->count; bb_index++) {
@ -300,6 +323,7 @@ next_group:
group_data[i].free_blocks_count);
}
}
return 0;
}
static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
@ -433,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
ext4_group_t group, count;
struct buffer_head *bh = NULL;
int reserved_gdb, i, j, err = 0, err2;
int meta_bg;
BUG_ON(!flex_gd->count || !group_data ||
group_data[0].group != sbi->s_groups_count);
reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
/* This transaction may be extended/restarted along the way */
handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
@ -447,12 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
group = group_data[0].group;
for (i = 0; i < flex_gd->count; i++, group++) {
unsigned long gdblocks;
ext4_grpblk_t overhead;
gdblocks = ext4_bg_num_gdb(sb, group);
start = ext4_group_first_block_no(sb, group);
if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
goto handle_itb;
if (meta_bg == 1) {
ext4_group_t first_group;
first_group = ext4_meta_bg_first_group(sb, group);
if (first_group != group + 1 &&
first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
goto handle_itb;
}
block = start + ext4_bg_has_super(sb, group);
/* Copy all of the GDT blocks into the backup in this group */
for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
for (j = 0; j < gdblocks; j++, block++) {
struct buffer_head *gdb;
ext4_debug("update backup group %#04llx\n", block);
@ -493,6 +532,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
goto out;
}
handle_itb:
/* Initialize group tables of the grop @group */
if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
goto handle_bb;
@ -521,11 +561,11 @@ handle_bb:
err = PTR_ERR(bh);
goto out;
}
if (ext4_bg_has_super(sb, group)) {
overhead = ext4_group_overhead_blocks(sb, group);
if (overhead != 0) {
ext4_debug("mark backup superblock %#04llx (+0)\n",
start);
ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
1);
ext4_set_bits(bh->b_data, 0, overhead);
}
ext4_mark_bitmap_end(group_data[i].blocks_count,
sb->s_blocksize * 8, bh->b_data);
@ -821,6 +861,45 @@ exit_bh:
return err;
}
/*
* add_new_gdb_meta_bg is the sister of add_new_gdb.
*/
static int add_new_gdb_meta_bg(struct super_block *sb,
handle_t *handle, ext4_group_t group) {
ext4_fsblk_t gdblock;
struct buffer_head *gdb_bh;
struct buffer_head **o_group_desc, **n_group_desc;
unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
int err;
gdblock = ext4_meta_bg_first_block_no(sb, group) +
ext4_bg_has_super(sb, group);
gdb_bh = sb_bread(sb, gdblock);
if (!gdb_bh)
return -EIO;
n_group_desc = ext4_kvmalloc((gdb_num + 1) *
sizeof(struct buffer_head *),
GFP_NOFS);
if (!n_group_desc) {
err = -ENOMEM;
ext4_warning(sb, "not enough memory for %lu groups",
gdb_num + 1);
return err;
}
o_group_desc = EXT4_SB(sb)->s_group_desc;
memcpy(n_group_desc, o_group_desc,
EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
n_group_desc[gdb_num] = gdb_bh;
EXT4_SB(sb)->s_group_desc = n_group_desc;
EXT4_SB(sb)->s_gdb_count++;
ext4_kvfree(o_group_desc);
err = ext4_journal_get_write_access(handle, gdb_bh);
if (unlikely(err))
brelse(gdb_bh);
return err;
}
/*
* Called when we are adding a new group which has a backup copy of each of
* the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
@ -949,16 +1028,16 @@ exit_free:
* do not copy the full number of backups at this time. The resize
* which changed s_groups_count will backup again.
*/
static void update_backups(struct super_block *sb,
int blk_off, char *data, int size)
static void update_backups(struct super_block *sb, int blk_off, char *data,
int size, int meta_bg)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
const ext4_group_t last = sbi->s_groups_count;
ext4_group_t last;
const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
unsigned three = 1;
unsigned five = 5;
unsigned seven = 7;
ext4_group_t group;
ext4_group_t group = 0;
int rest = sb->s_blocksize - size;
handle_t *handle;
int err = 0, err2;
@ -970,10 +1049,17 @@ static void update_backups(struct super_block *sb,
goto exit_err;
}
ext4_superblock_csum_set(sb, (struct ext4_super_block *)data);
if (meta_bg == 0) {
group = ext4_list_backups(sb, &three, &five, &seven);
last = sbi->s_groups_count;
} else {
group = ext4_meta_bg_first_group(sb, group) + 1;
last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
}
while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
while (group < sbi->s_groups_count) {
struct buffer_head *bh;
ext4_fsblk_t backup_block;
/* Out of journal space, and can't get more - abort - so sad */
if (ext4_handle_valid(handle) &&
@ -982,13 +1068,20 @@ static void update_backups(struct super_block *sb,
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
break;
bh = sb_getblk(sb, group * bpg + blk_off);
if (meta_bg == 0)
backup_block = group * bpg + blk_off;
else
backup_block = (ext4_group_first_block_no(sb, group) +
ext4_bg_has_super(sb, group));
bh = sb_getblk(sb, backup_block);
if (!bh) {
err = -EIO;
break;
}
ext4_debug("update metadata backup %#04lx\n",
(unsigned long)bh->b_blocknr);
ext4_debug("update metadata backup %llu(+%llu)\n",
backup_block, backup_block -
ext4_group_first_block_no(sb, group));
if ((err = ext4_journal_get_write_access(handle, bh)))
break;
lock_buffer(bh);
@ -1001,6 +1094,13 @@ static void update_backups(struct super_block *sb,
if (unlikely(err))
ext4_std_error(sb, err);
brelse(bh);
if (meta_bg == 0)
group = ext4_list_backups(sb, &three, &five, &seven);
else if (group == last)
break;
else
group = last;
}
if ((err2 = ext4_journal_stop(handle)) && !err)
err = err2;
@ -1043,7 +1143,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
struct ext4_super_block *es = sbi->s_es;
struct buffer_head *gdb_bh;
int i, gdb_off, gdb_num, err = 0;
int meta_bg;
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
for (i = 0; i < count; i++, group++) {
int reserved_gdb = ext4_bg_has_super(sb, group) ?
le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
@ -1063,8 +1165,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
err = reserve_backup_gdb(handle, resize_inode, group);
} else
} else if (meta_bg != 0) {
err = add_new_gdb_meta_bg(sb, handle, group);
} else {
err = add_new_gdb(handle, resize_inode, group);
}
if (err)
break;
}
@ -1076,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
struct buffer_head *bh = sb_getblk(sb, block);
if (!bh)
return NULL;
if (bitmap_uptodate(bh))
return bh;
lock_buffer(bh);
if (bh_submit_read(bh) < 0) {
unlock_buffer(bh);
brelse(bh);
return NULL;
if (!bh_uptodate_or_lock(bh)) {
if (bh_submit_read(bh) < 0) {
brelse(bh);
return NULL;
}
}
unlock_buffer(bh);
return bh;
}
@ -1161,6 +1261,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
ext4_free_group_clusters_set(sb, gdp,
EXT4_B2C(sbi, group_data->free_blocks_count));
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
if (ext4_has_group_desc_csum(sb))
ext4_itable_unused_set(sb, gdp,
EXT4_INODES_PER_GROUP(sb));
gdp->bg_flags = cpu_to_le16(*bg_flags);
ext4_group_desc_csum_set(sb, group, gdp);
@ -1216,7 +1319,7 @@ static void ext4_update_super(struct super_block *sb,
}
reserved_blocks = ext4_r_blocks_count(es) * 100;
do_div(reserved_blocks, ext4_blocks_count(es));
reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es));
reserved_blocks *= blocks_count;
do_div(reserved_blocks, 100);
@ -1227,6 +1330,7 @@ static void ext4_update_super(struct super_block *sb,
le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
flex_gd->count);
ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
/*
* We need to protect s_groups_count against other CPUs seeing
* inconsistent state in the superblock.
@ -1261,6 +1365,8 @@ static void ext4_update_super(struct super_block *sb,
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
ext4_debug("free blocks count %llu",
percpu_counter_read(&sbi->s_freeclusters_counter));
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
sbi->s_log_groups_per_flex) {
@ -1349,16 +1455,24 @@ exit_journal:
err = err2;
if (!err) {
int i;
int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
int gdb_num_end = ((group + flex_gd->count - 1) /
EXT4_DESC_PER_BLOCK(sb));
int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb,
EXT4_FEATURE_INCOMPAT_META_BG);
sector_t old_gdb = 0;
update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
for (i = 0; i < flex_gd->count; i++, group++) {
sizeof(struct ext4_super_block), 0);
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
int gdb_num;
gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
gdb_bh = sbi->s_group_desc[gdb_num];
if (old_gdb == gdb_bh->b_blocknr)
continue;
update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
gdb_bh->b_size);
gdb_bh->b_size, meta_bg);
old_gdb = gdb_bh->b_blocknr;
}
}
exit:
@ -1402,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
group_data[i].group = group + i;
group_data[i].blocks_count = blocks_per_group;
overhead = ext4_bg_has_super(sb, group + i) ?
(1 + ext4_bg_num_gdb(sb, group + i) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
overhead = ext4_group_overhead_blocks(sb, group + i);
group_data[i].free_blocks_count = blocks_per_group - overhead;
if (ext4_has_group_desc_csum(sb))
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
@ -1492,6 +1604,14 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if (err)
goto out;
err = ext4_alloc_flex_bg_array(sb, input->group + 1);
if (err)
return err;
err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
if (err)
goto out;
flex_gd.count = 1;
flex_gd.groups = input;
flex_gd.bg_flags = &bg_flags;
@ -1544,11 +1664,13 @@ errout:
err = err2;
if (!err) {
ext4_fsblk_t first_block;
first_block = ext4_group_first_block_no(sb, 0);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
sizeof(struct ext4_super_block));
update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block,
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
}
@ -1631,6 +1753,94 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
return err;
} /* ext4_group_extend */
static int num_desc_blocks(struct super_block *sb, ext4_group_t groups)
{
return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
}
/*
* Release the resize inode and drop the resize_inode feature if there
* are no more reserved gdt blocks, and then convert the file system
* to enable meta_bg
*/
static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
{
handle_t *handle;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct ext4_inode_info *ei = EXT4_I(inode);
ext4_fsblk_t nr;
int i, ret, err = 0;
int credits = 1;
ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg");
if (inode) {
if (es->s_reserved_gdt_blocks) {
ext4_error(sb, "Unexpected non-zero "
"s_reserved_gdt_blocks");
return -EPERM;
}
/* Do a quick sanity check of the resize inode */
if (inode->i_blocks != 1 << (inode->i_blkbits - 9))
goto invalid_resize_inode;
for (i = 0; i < EXT4_N_BLOCKS; i++) {
if (i == EXT4_DIND_BLOCK) {
if (ei->i_data[i])
continue;
else
goto invalid_resize_inode;
}
if (ei->i_data[i])
goto invalid_resize_inode;
}
credits += 3; /* block bitmap, bg descriptor, resize inode */
}
handle = ext4_journal_start_sb(sb, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
err = ext4_journal_get_write_access(handle, sbi->s_sbh);
if (err)
goto errout;
EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE);
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
sbi->s_es->s_first_meta_bg =
cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
err = ext4_handle_dirty_super(handle, sb);
if (err) {
ext4_std_error(sb, err);
goto errout;
}
if (inode) {
nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]);
ext4_free_blocks(handle, inode, NULL, nr, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
ei->i_data[EXT4_DIND_BLOCK] = 0;
inode->i_blocks = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err)
ext4_std_error(sb, err);
}
errout:
ret = ext4_journal_stop(handle);
if (!err)
err = ret;
return ret;
invalid_resize_inode:
ext4_error(sb, "corrupted/inconsistent resize inode");
return -EINVAL;
}
/*
* ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
*
@ -1643,21 +1853,31 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct buffer_head *bh;
struct inode *resize_inode;
ext4_fsblk_t o_blocks_count;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_grpblk_t offset, add;
struct inode *resize_inode = NULL;
ext4_grpblk_t add, offset;
unsigned long n_desc_blocks;
unsigned long o_desc_blocks;
unsigned long desc_blocks;
int err = 0, flexbg_size = 1;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_fsblk_t o_blocks_count;
ext4_fsblk_t n_blocks_count_retry = 0;
unsigned long last_update_time = 0;
int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
int meta_bg;
/* See if the device is actually as big as what was requested */
bh = sb_bread(sb, n_blocks_count - 1);
if (!bh) {
ext4_warning(sb, "can't read last block, resize aborted");
return -ENOSPC;
}
brelse(bh);
retry:
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu "
"to %llu blocks", o_blocks_count, n_blocks_count);
ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu "
"to %llu blocks", o_blocks_count, n_blocks_count);
if (n_blocks_count < o_blocks_count) {
/* On-line shrinking not supported */
@ -1672,33 +1892,50 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
EXT4_DESC_PER_BLOCK(sb);
o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
desc_blocks = n_desc_blocks - o_desc_blocks;
n_desc_blocks = num_desc_blocks(sb, n_group + 1);
o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count);
if (desc_blocks &&
(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
ext4_warning(sb, "No reserved GDT blocks, can't resize");
return -EPERM;
meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) {
if (meta_bg) {
ext4_error(sb, "resize_inode and meta_bg enabled "
"simultaneously");
return -EINVAL;
}
if (n_desc_blocks > o_desc_blocks +
le16_to_cpu(es->s_reserved_gdt_blocks)) {
n_blocks_count_retry = n_blocks_count;
n_desc_blocks = o_desc_blocks +
le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb);
n_group--; /* set to last group number */
}
if (!resize_inode)
resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
}
}
resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
if (IS_ERR(resize_inode)) {
ext4_warning(sb, "Error opening resize inode");
return PTR_ERR(resize_inode);
if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) {
err = ext4_convert_meta_bg(sb, resize_inode);
if (err)
goto out;
if (resize_inode) {
iput(resize_inode);
resize_inode = NULL;
}
if (n_blocks_count_retry) {
n_blocks_count = n_blocks_count_retry;
n_blocks_count_retry = 0;
goto retry;
}
}
/* See if the device is actually as big as what was requested */
bh = sb_bread(sb, n_blocks_count - 1);
if (!bh) {
ext4_warning(sb, "can't read last block, resize aborted");
return -ENOSPC;
}
brelse(bh);
/* extend the last group */
if (n_group == o_group)
add = n_blocks_count - o_blocks_count;
@ -1710,12 +1947,15 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
goto out;
}
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
es->s_log_groups_per_flex)
flexbg_size = 1 << es->s_log_groups_per_flex;
if (ext4_blocks_count(es) == n_blocks_count)
goto out;
o_blocks_count = ext4_blocks_count(es);
if (o_blocks_count == n_blocks_count)
err = ext4_alloc_flex_bg_array(sb, n_group + 1);
if (err)
return err;
err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
if (err)
goto out;
flex_gd = alloc_flex_gd(flexbg_size);
@ -1729,19 +1969,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
*/
while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
flexbg_size)) {
ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
if (jiffies - last_update_time > HZ * 10) {
if (last_update_time)
ext4_msg(sb, KERN_INFO,
"resized to %llu blocks",
ext4_blocks_count(es));
last_update_time = jiffies;
}
if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
break;
err = ext4_flex_group_add(sb, resize_inode, flex_gd);
if (unlikely(err))
break;
}
if (!err && n_blocks_count_retry) {
n_blocks_count = n_blocks_count_retry;
n_blocks_count_retry = 0;
free_flex_gd(flex_gd);
flex_gd = NULL;
goto retry;
}
out:
if (flex_gd)
free_flex_gd(flex_gd);
iput(resize_inode);
if (test_opt(sb, DEBUG))
ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
"upto %llu blocks", o_blocks_count, n_blocks_count);
if (resize_inode != NULL)
iput(resize_inode);
ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count);
return err;
}

View File

@ -420,7 +420,7 @@ static void __save_error_info(struct super_block *sb, const char *func,
*/
if (!es->s_error_count)
mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
le32_add_cpu(&es->s_error_count, 1);
}
static void save_error_info(struct super_block *sb, const char *func,
@ -850,7 +850,6 @@ static void ext4_put_super(struct super_block *sb)
flush_workqueue(sbi->dio_unwritten_wq);
destroy_workqueue(sbi->dio_unwritten_wq);
lock_super(sb);
if (sbi->s_journal) {
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@ -917,7 +916,6 @@ static void ext4_put_super(struct super_block *sb)
* Now that we are completely done shutting down the
* superblock, we need to actually destroy the kobject.
*/
unlock_super(sb);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
if (sbi->s_chksum_driver)
@ -956,11 +954,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->jinode = NULL;
INIT_LIST_HEAD(&ei->i_completed_io_list);
spin_lock_init(&ei->i_completed_io_lock);
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_aiodio_unwritten, 0);
atomic_set(&ei->i_unwritten, 0);
return &ei->vfs_inode;
}
@ -1224,6 +1221,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb,
};
static const match_table_t tokens = {
@ -1297,6 +1295,7 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable=%u"},
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@ -1477,6 +1476,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_err, 0, 0}
};
@ -1592,6 +1592,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
if (!args->from)
arg = EXT4_DEF_LI_WAIT_MULT;
sbi->s_li_wait_mult = arg;
} else if (token == Opt_max_dir_size_kb) {
sbi->s_max_dir_size_kb = arg;
} else if (token == Opt_stripe) {
sbi->s_stripe = arg;
} else if (m->flags & MOPT_DATAJ) {
@ -1664,7 +1666,7 @@ static int parse_options(char *options, struct super_block *sb,
* Initialize args struct so we know whether arg was
* found; some options take optional arguments.
*/
args[0].to = args[0].from = 0;
args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
if (handle_mount_opt(sb, p, token, args, journal_devnum,
journal_ioprio, is_remount) < 0)
@ -1740,7 +1742,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
static const char *token2str(int token)
{
static const struct match_token *t;
const struct match_token *t;
for (t = tokens; t->token != Opt_err; t++)
if (t->token == token && !strchr(t->pattern, '='))
@ -1823,6 +1825,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb)
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
ext4_show_quota_options(seq, sb);
return 0;
@ -1914,15 +1918,45 @@ done:
return res;
}
int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct flex_groups *new_groups;
int size;
if (!sbi->s_log_groups_per_flex)
return 0;
size = ext4_flex_group(sbi, ngroup - 1) + 1;
if (size <= sbi->s_flex_groups_allocated)
return 0;
size = roundup_pow_of_two(size * sizeof(struct flex_groups));
new_groups = ext4_kvzalloc(size, GFP_KERNEL);
if (!new_groups) {
ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
size / (int) sizeof(struct flex_groups));
return -ENOMEM;
}
if (sbi->s_flex_groups) {
memcpy(new_groups, sbi->s_flex_groups,
(sbi->s_flex_groups_allocated *
sizeof(struct flex_groups)));
ext4_kvfree(sbi->s_flex_groups);
}
sbi->s_flex_groups = new_groups;
sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
return 0;
}
static int ext4_fill_flex_info(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_desc *gdp = NULL;
ext4_group_t flex_group_count;
ext4_group_t flex_group;
unsigned int groups_per_flex = 0;
size_t size;
int i;
int i, err;
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
@ -1931,17 +1965,9 @@ static int ext4_fill_flex_info(struct super_block *sb)
}
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
/* We allocate both existing and potentially added groups */
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
size = flex_group_count * sizeof(struct flex_groups);
sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
if (sbi->s_flex_groups == NULL) {
ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
flex_group_count);
err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
if (err)
goto failed;
}
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext4_get_group_desc(sb, i, NULL);
@ -2144,10 +2170,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
}
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
if (es->s_last_orphan)
/* don't clear list on RO mount w/ errors */
if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
jbd_debug(1, "Errors on filesystem, "
"clearing orphan list.\n");
es->s_last_orphan = 0;
es->s_last_orphan = 0;
}
jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
return;
}
@ -2528,6 +2556,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
static struct attribute *ext4_attrs[] = {
@ -2543,6 +2572,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
NULL,
};
@ -2550,10 +2580,12 @@ static struct attribute *ext4_attrs[] = {
/* Features this copy of ext4 supports */
EXT4_INFO_ATTR(lazy_itable_init);
EXT4_INFO_ATTR(batched_discard);
EXT4_INFO_ATTR(meta_bg_resize);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
ATTR_LIST(batched_discard),
ATTR_LIST(meta_bg_resize),
NULL,
};
@ -3374,7 +3406,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* enable delayed allocation by default
* Use -o nodelalloc to turn it off
*/
if (!IS_EXT3_SB(sb) &&
if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sb, DELALLOC);
@ -3743,6 +3775,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_stripe = ext4_get_stripe_size(sbi);
sbi->s_max_writeback_mb_bump = 128;
sbi->s_extent_max_zeroout_kb = 32;
/*
* set up enough so that it can read an inode
@ -4519,11 +4552,9 @@ static int ext4_unfreeze(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
lock_super(sb);
/* Reset the needs_recovery flag before the fs is unlocked. */
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
ext4_commit_super(sb, 1);
unlock_super(sb);
return 0;
}
@ -4559,7 +4590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
char *orig_data = kstrdup(data, GFP_KERNEL);
/* Store the original options */
lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_mount_opt2 = sbi->s_mount_opt2;
@ -4701,7 +4731,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal == NULL)
ext4_commit_super(sb, 1);
unlock_super(sb);
#ifdef CONFIG_QUOTA
/* Release old quota file names */
for (i = 0; i < MAXQUOTAS; i++)
@ -4714,10 +4743,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_QUOTA)) {
err = ext4_enable_quotas(sb);
if (err) {
lock_super(sb);
if (err)
goto restore_opts;
}
}
}
#endif
@ -4744,7 +4771,6 @@ restore_opts:
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
}
#endif
unlock_super(sb);
kfree(orig_data);
return err;
}
@ -5269,8 +5295,10 @@ static int __init ext4_init_fs(void)
if (err)
goto out6;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
if (!ext4_kset) {
err = -ENOMEM;
goto out5;
}
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
err = ext4_init_feat_adverts();

View File

@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
{
return test_bit(BDI_writeback_running, &bdi->state);
}
EXPORT_SYMBOL(writeback_in_progress);
static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
{

View File

@ -1014,17 +1014,35 @@ restart_loop:
* there's no point in keeping a checkpoint record for
* it. */
/* A buffer which has been freed while still being
* journaled by a previous transaction may end up still
* being dirty here, but we want to avoid writing back
* that buffer in the future after the "add to orphan"
* operation been committed, That's not only a performance
* gain, it also stops aliasing problems if the buffer is
* left behind for writeback and gets reallocated for another
* use in a different page. */
if (buffer_freed(bh) && !jh->b_next_transaction) {
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
/*
* A buffer which has been freed while still being journaled by
* a previous transaction.
*/
if (buffer_freed(bh)) {
/*
* If the running transaction is the one containing
* "add to orphan" operation (b_next_transaction !=
* NULL), we have to wait for that transaction to
* commit before we can really get rid of the buffer.
* So just clear b_modified to not confuse transaction
* credit accounting and refile the buffer to
* BJ_Forget of the running transaction. If the just
* committed transaction contains "add to orphan"
* operation, we can completely invalidate the buffer
* now. We are rather through in that since the
* buffer may be still accessible when blocksize <
* pagesize and it is attached to the last partial
* page.
*/
jh->b_modified = 0;
if (!jh->b_next_transaction) {
clear_buffer_freed(bh);
clear_buffer_jbddirty(bh);
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_req(bh);
bh->b_bdev = NULL;
}
}
if (buffer_jbddirty(bh)) {

View File

@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal)
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
read_lock(&journal->j_state_lock);
/* Is it already empty? */
if (sb->s_start == 0) {
read_unlock(&journal->j_state_lock);
return;
}
jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence);

View File

@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal)
if (!err)
err = err2;
/* Make sure all replayed data is on permanent storage */
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
if (journal->j_flags & JBD2_BARRIER) {
err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
if (!err)
err = err2;
}
return err;
}

View File

@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
* We're outside-transaction here. Either or both of j_running_transaction
* and j_committing_transaction may be NULL.
*/
static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
int partial_page)
{
transaction_t *transaction;
struct journal_head *jh;
int may_free = 1;
int ret;
BUFFER_TRACE(bh, "entry");
retry:
/*
* It is safe to proceed here without the j_list_lock because the
* buffers cannot be stolen by try_to_free_buffers as long as we are
@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* clear the buffer dirty bit at latest at the moment when the
* transaction marking the buffer as freed in the filesystem
* structures is committed because from that moment on the
* buffer can be reallocated and used by a different page.
* block can be reallocated and used by a different page.
* Since the block hasn't been freed yet but the inode has
* already been added to orphan list, it is safe for us to add
* the buffer to BJ_Forget list of the newest transaction.
*
* Also we have to clear buffer_mapped flag of a truncated buffer
* because the buffer_head may be attached to the page straddling
* i_size (can happen only when blocksize < pagesize) and thus the
* buffer_head can be reused when the file is extended again. So we end
* up keeping around invalidated buffers attached to transactions'
* BJ_Forget list just to stop checkpointing code from cleaning up
* the transaction this buffer was modified in.
*/
transaction = jh->b_transaction;
if (transaction == NULL) {
@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* committed, the buffer won't be needed any
* longer. */
JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
ret = __dispose_buffer(jh,
may_free = __dispose_buffer(jh,
journal->j_running_transaction);
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
write_unlock(&journal->j_state_lock);
return ret;
goto zap_buffer;
} else {
/* There is no currently-running transaction. So the
* orphan record which we wrote for this file must have
@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* the committing transaction, if it exists. */
if (journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "give to committing trans");
ret = __dispose_buffer(jh,
may_free = __dispose_buffer(jh,
journal->j_committing_transaction);
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
write_unlock(&journal->j_state_lock);
return ret;
goto zap_buffer;
} else {
/* The orphan record's transaction has
* committed. We can cleanse this buffer */
@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
JBUFFER_TRACE(jh, "on committing transaction");
/*
* The buffer is committing, we simply cannot touch
* it. So we just set j_next_transaction to the
* running transaction (if there is one) and mark
* buffer as freed so that commit code knows it should
* clear dirty bits when it is done with the buffer.
* it. If the page is straddling i_size we have to wait
* for commit and try again.
*/
if (partial_page) {
tid_t tid = journal->j_committing_transaction->t_tid;
jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
write_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid);
goto retry;
}
/*
* OK, buffer won't be reachable after truncate. We just set
* j_next_transaction to the running transaction (if there is
* one) and mark buffer as freed so that commit code knows it
* should clear dirty bits when it is done with the buffer.
*/
set_buffer_freed(bh);
if (journal->j_running_transaction && buffer_jbddirty(bh))
@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
}
zap_buffer:
/*
* This is tricky. Although the buffer is truncated, it may be reused
* if blocksize < pagesize and it is attached to the page straddling
* EOF. Since the buffer might have been added to BJ_Forget list of the
* running transaction, journal_get_write_access() won't clear
* b_modified and credit accounting gets confused. So clear b_modified
* here.
*/
jh->b_modified = 0;
jbd2_journal_put_journal_head(jh);
zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock);
@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal,
if (offset <= curr_off) {
/* This block is wholly outside the truncation point */
lock_buffer(bh);
may_free &= journal_unmap_buffer(journal, bh);
may_free &= journal_unmap_buffer(journal, bh,
offset > 0);
unlock_buffer(bh);
}
curr_off = next_off;

View File

@ -116,6 +116,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (unlikely(ret))
goto out;
file_update_time(vma->vm_file);
ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
if (ret) {
nilfs_transaction_abort(inode->i_sb);

View File

@ -3,6 +3,7 @@
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
#ifdef __KERNEL__

View File

@ -26,19 +26,19 @@ TRACE_EVENT(ext4_free_inode,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, mode )
__field( uid_t, uid )
__field( gid_t, gid )
__field( __u64, blocks )
__field( __u16, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = i_uid_read(inode);
__entry->gid = i_gid_read(inode);
__entry->blocks = inode->i_blocks;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
@ -300,10 +300,10 @@ TRACE_EVENT(ext4_da_writepages,
__field( long, pages_skipped )
__field( loff_t, range_start )
__field( loff_t, range_end )
__field( pgoff_t, writeback_index )
__field( int, sync_mode )
__field( char, for_kupdate )
__field( char, range_cyclic )
__field( pgoff_t, writeback_index )
),
TP_fast_assign(
@ -313,14 +313,14 @@ TRACE_EVENT(ext4_da_writepages,
__entry->pages_skipped = wbc->pages_skipped;
__entry->range_start = wbc->range_start;
__entry->range_end = wbc->range_end;
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->sync_mode = wbc->sync_mode;
__entry->for_kupdate = wbc->for_kupdate;
__entry->range_cyclic = wbc->range_cyclic;
__entry->writeback_index = inode->i_mapping->writeback_index;
),
TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
"range_start %lld range_end %lld sync_mode %d"
"range_start %lld range_end %lld sync_mode %d "
"for_kupdate %d range_cyclic %d writeback_index %lu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->nr_to_write,
@ -382,8 +382,8 @@ TRACE_EVENT(ext4_da_writepages_result,
__field( int, ret )
__field( int, pages_written )
__field( long, pages_skipped )
__field( int, sync_mode )
__field( pgoff_t, writeback_index )
__field( int, sync_mode )
),
TP_fast_assign(
@ -392,8 +392,8 @@ TRACE_EVENT(ext4_da_writepages_result,
__entry->ret = ret;
__entry->pages_written = pages_written;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
__entry->writeback_index = inode->i_mapping->writeback_index;
__entry->sync_mode = wbc->sync_mode;
),
TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld "
@ -411,16 +411,16 @@ DECLARE_EVENT_CLASS(ext4__page_op,
TP_ARGS(page),
TP_STRUCT__entry(
__field( pgoff_t, index )
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
__entry->index = page->index;
__entry->ino = page->mapping->host->i_ino;
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->index = page->index;
),
TP_printk("dev %d,%d ino %lu page_index %lu",
@ -456,18 +456,18 @@ TRACE_EVENT(ext4_invalidatepage,
TP_ARGS(page, offset),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
__field( unsigned long, offset )
__field( ino_t, ino )
__field( dev_t, dev )
),
TP_fast_assign(
__entry->dev = page->mapping->host->i_sb->s_dev;
__entry->ino = page->mapping->host->i_ino;
__entry->index = page->index;
__entry->offset = offset;
__entry->ino = page->mapping->host->i_ino;
__entry->dev = page->mapping->host->i_sb->s_dev;
),
TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
@ -510,8 +510,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
__field( __u64, pa_lstart )
__field( __u32, pa_len )
),
@ -519,8 +519,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
__entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
__entry->pa_len = pa->pa_len;
),
TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
@ -645,7 +645,6 @@ TRACE_EVENT(ext4_request_blocks,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, flags )
__field( unsigned int, len )
__field( __u32, logical )
__field( __u32, lleft )
@ -653,12 +652,12 @@ TRACE_EVENT(ext4_request_blocks,
__field( __u64, goal )
__field( __u64, pleft )
__field( __u64, pright )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->logical = ar->logical;
__entry->goal = ar->goal;
@ -666,6 +665,7 @@ TRACE_EVENT(ext4_request_blocks,
__entry->lright = ar->lright;
__entry->pleft = ar->pleft;
__entry->pright = ar->pright;
__entry->flags = ar->flags;
),
TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu "
@ -686,7 +686,6 @@ TRACE_EVENT(ext4_allocate_blocks,
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned int, flags )
__field( unsigned int, len )
__field( __u32, logical )
__field( __u32, lleft )
@ -694,13 +693,13 @@ TRACE_EVENT(ext4_allocate_blocks,
__field( __u64, goal )
__field( __u64, pleft )
__field( __u64, pright )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->block = block;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->logical = ar->logical;
__entry->goal = ar->goal;
@ -708,6 +707,7 @@ TRACE_EVENT(ext4_allocate_blocks,
__entry->lright = ar->lright;
__entry->pleft = ar->pleft;
__entry->pright = ar->pright;
__entry->flags = ar->flags;
),
TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u "
@ -728,19 +728,19 @@ TRACE_EVENT(ext4_free_blocks,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, mode )
__field( __u64, block )
__field( unsigned long, count )
__field( int, flags )
__field( __u16, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->block = block;
__entry->count = count;
__entry->flags = flags;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
@ -783,15 +783,15 @@ TRACE_EVENT(ext4_sync_file_exit,
TP_ARGS(inode, ret),
TP_STRUCT__entry(
__field( int, ret )
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, ret )
),
TP_fast_assign(
__entry->ret = ret;
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %lu ret %d",
@ -854,12 +854,6 @@ TRACE_EVENT(ext4_mballoc_alloc,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, found )
__field( __u16, groups )
__field( __u16, buddy )
__field( __u16, flags )
__field( __u16, tail )
__field( __u8, cr )
__field( __u32, orig_logical )
__field( int, orig_start )
__field( __u32, orig_group )
@ -872,17 +866,17 @@ TRACE_EVENT(ext4_mballoc_alloc,
__field( int, result_start )
__field( __u32, result_group )
__field( int, result_len )
__field( __u16, found )
__field( __u16, groups )
__field( __u16, buddy )
__field( __u16, flags )
__field( __u16, tail )
__field( __u8, cr )
),
TP_fast_assign(
__entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->found = ac->ac_found;
__entry->flags = ac->ac_flags;
__entry->groups = ac->ac_groups_scanned;
__entry->buddy = ac->ac_buddy;
__entry->tail = ac->ac_tail;
__entry->cr = ac->ac_criteria;
__entry->orig_logical = ac->ac_o_ex.fe_logical;
__entry->orig_start = ac->ac_o_ex.fe_start;
__entry->orig_group = ac->ac_o_ex.fe_group;
@ -895,6 +889,12 @@ TRACE_EVENT(ext4_mballoc_alloc,
__entry->result_start = ac->ac_f_ex.fe_start;
__entry->result_group = ac->ac_f_ex.fe_group;
__entry->result_len = ac->ac_f_ex.fe_len;
__entry->found = ac->ac_found;
__entry->flags = ac->ac_flags;
__entry->groups = ac->ac_groups_scanned;
__entry->buddy = ac->ac_buddy;
__entry->tail = ac->ac_tail;
__entry->cr = ac->ac_criteria;
),
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
@ -1015,17 +1015,17 @@ TRACE_EVENT(ext4_forget,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, mode )
__field( int, is_metadata )
__field( __u64, block )
__field( int, is_metadata )
__field( __u16, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->is_metadata = is_metadata;
__entry->block = block;
__entry->is_metadata = is_metadata;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
@ -1042,19 +1042,18 @@ TRACE_EVENT(ext4_da_update_reserve_space,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, mode )
__field( __u64, i_blocks )
__field( int, used_blocks )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( int, allocated_meta_blocks )
__field( int, quota_claim )
__field( __u16, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->used_blocks = used_blocks;
__entry->reserved_data_blocks =
@ -1064,6 +1063,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
__entry->allocated_meta_blocks =
EXT4_I(inode)->i_allocated_meta_blocks;
__entry->quota_claim = quota_claim;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
@ -1085,21 +1085,21 @@ TRACE_EVENT(ext4_da_reserve_space,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, mode )
__field( __u64, i_blocks )
__field( int, md_needed )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( __u16, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->md_needed = md_needed;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
@ -1119,23 +1119,23 @@ TRACE_EVENT(ext4_da_release_space,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, mode )
__field( __u64, i_blocks )
__field( int, freed_blocks )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( int, allocated_meta_blocks )
__field( __u16, mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->freed_blocks = freed_blocks;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
@ -1203,16 +1203,16 @@ TRACE_EVENT(ext4_direct_IO_enter,
TP_ARGS(inode, offset, len, rw),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned long, len )
__field( int, rw )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = offset;
__entry->len = len;
__entry->rw = rw;
@ -1231,8 +1231,8 @@ TRACE_EVENT(ext4_direct_IO_exit,
TP_ARGS(inode, offset, len, rw, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned long, len )
__field( int, rw )
@ -1240,8 +1240,8 @@ TRACE_EVENT(ext4_direct_IO_exit,
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = offset;
__entry->len = len;
__entry->rw = rw;
@ -1261,16 +1261,16 @@ TRACE_EVENT(ext4_fallocate_enter,
TP_ARGS(inode, offset, len, mode),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( loff_t, len )
__field( int, mode )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = offset;
__entry->len = len;
__entry->mode = mode;
@ -1289,16 +1289,16 @@ TRACE_EVENT(ext4_fallocate_exit,
TP_ARGS(inode, offset, max_blocks, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, blocks )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = offset;
__entry->blocks = max_blocks;
__entry->ret = ret;
@ -1317,17 +1317,17 @@ TRACE_EVENT(ext4_unlink_enter,
TP_ARGS(parent, dentry),
TP_STRUCT__entry(
__field( ino_t, parent )
__field( ino_t, ino )
__field( loff_t, size )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, parent )
__field( loff_t, size )
),
TP_fast_assign(
__entry->parent = parent->i_ino;
__entry->ino = dentry->d_inode->i_ino;
__entry->size = dentry->d_inode->i_size;
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ino = dentry->d_inode->i_ino;
__entry->parent = parent->i_ino;
__entry->size = dentry->d_inode->i_size;
),
TP_printk("dev %d,%d ino %lu size %lld parent %lu",
@ -1342,14 +1342,14 @@ TRACE_EVENT(ext4_unlink_exit,
TP_ARGS(dentry, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = dentry->d_inode->i_ino;
__entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ino = dentry->d_inode->i_ino;
__entry->ret = ret;
),
@ -1365,14 +1365,14 @@ DECLARE_EVENT_CLASS(ext4__truncate,
TP_ARGS(inode),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, blocks )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->blocks = inode->i_blocks;
),
@ -1403,8 +1403,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
TP_ARGS(inode, map, ux),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, m_lblk )
__field( unsigned, m_len )
__field( ext4_lblk_t, u_lblk )
@ -1413,8 +1413,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->m_lblk = map->m_lblk;
__entry->m_len = map->m_len;
__entry->u_lblk = le32_to_cpu(ux->ee_block);
@ -1441,8 +1441,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
TP_ARGS(inode, map, ux, ix),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, m_lblk )
__field( unsigned, m_len )
__field( ext4_lblk_t, u_lblk )
@ -1454,8 +1454,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->m_lblk = map->m_lblk;
__entry->m_len = map->m_len;
__entry->u_lblk = le32_to_cpu(ux->ee_block);
@ -1483,16 +1483,16 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
TP_ARGS(inode, lblk, len, flags),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
__field( unsigned int, flags )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->len = len;
__entry->flags = flags;
@ -1525,19 +1525,19 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
TP_ARGS(inode, lblk, pblk, len, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( ino_t, ino )
__field( ext4_fsblk_t, pblk )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->ino = inode->i_ino;
__entry->pblk = pblk;
__entry->lblk = lblk;
__entry->len = len;
__entry->ret = ret;
),
@ -1569,17 +1569,17 @@ TRACE_EVENT(ext4_ext_load_extent,
TP_ARGS(inode, lblk, pblk),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( ino_t, ino )
__field( ext4_fsblk_t, pblk )
__field( ext4_lblk_t, lblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->ino = inode->i_ino;
__entry->pblk = pblk;
__entry->lblk = lblk;
),
TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
@ -1594,13 +1594,13 @@ TRACE_EVENT(ext4_load_inode,
TP_ARGS(inode),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %d,%d ino %ld",
@ -1615,14 +1615,14 @@ TRACE_EVENT(ext4_journal_start,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, nblocks )
__field(unsigned long, ip )
__field( int, nblocks )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->nblocks = nblocks;
__entry->ip = IP;
__entry->nblocks = nblocks;
),
TP_printk("dev %d,%d nblocks %d caller %pF",
@ -1686,23 +1686,23 @@ TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
TP_ARGS(inode, map, allocated, newblock),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( int, flags )
__field( ext4_lblk_t, lblk )
__field( ext4_fsblk_t, pblk )
__field( unsigned int, len )
__field( int, flags )
__field( unsigned int, allocated )
__field( ext4_fsblk_t, newblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->flags = map->m_flags;
__entry->lblk = map->m_lblk;
__entry->pblk = map->m_pblk;
__entry->len = map->m_len;
__entry->flags = map->m_flags;
__entry->allocated = allocated;
__entry->newblk = newblock;
),
@ -1724,19 +1724,19 @@ TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
TP_STRUCT__entry(
__field( dev_t, dev )
__field( unsigned int, flags )
__field( ext4_lblk_t, lblk )
__field( ext4_fsblk_t, pblk )
__field( unsigned int, len )
__field( unsigned int, flags )
__field( int, ret )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->flags = map->m_flags;
__entry->lblk = map->m_lblk;
__entry->pblk = map->m_pblk;
__entry->len = map->m_len;
__entry->flags = map->m_flags;
__entry->ret = ret;
),
@ -1753,16 +1753,16 @@ TRACE_EVENT(ext4_ext_put_in_cache,
TP_ARGS(inode, lblk, len, start),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
__field( ext4_fsblk_t, start )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->len = len;
__entry->start = start;
@ -1782,15 +1782,15 @@ TRACE_EVENT(ext4_ext_in_cache,
TP_ARGS(inode, lblk, ret),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( int, ret )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->ret = ret;
),
@ -1810,8 +1810,8 @@ TRACE_EVENT(ext4_find_delalloc_range,
TP_ARGS(inode, from, to, reverse, found, found_blk),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, from )
__field( ext4_lblk_t, to )
__field( int, reverse )
@ -1820,8 +1820,8 @@ TRACE_EVENT(ext4_find_delalloc_range,
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->from = from;
__entry->to = to;
__entry->reverse = reverse;
@ -1844,15 +1844,15 @@ TRACE_EVENT(ext4_get_reserved_cluster_alloc,
TP_ARGS(inode, lblk, len),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, lblk )
__field( unsigned int, len )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = lblk;
__entry->len = len;
),
@ -1871,18 +1871,18 @@ TRACE_EVENT(ext4_ext_show_extent,
TP_ARGS(inode, lblk, pblk, len),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, lblk )
__field( ino_t, ino )
__field( ext4_fsblk_t, pblk )
__field( ext4_lblk_t, lblk )
__field( unsigned short, len )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->lblk = lblk;
__entry->ino = inode->i_ino;
__entry->pblk = pblk;
__entry->lblk = lblk;
__entry->len = len;
),
@ -1902,25 +1902,25 @@ TRACE_EVENT(ext4_remove_blocks,
TP_ARGS(inode, ex, from, to, partial_cluster),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ext4_lblk_t, ee_lblk )
__field( ext4_fsblk_t, ee_pblk )
__field( unsigned short, ee_len )
__field( ino_t, ino )
__field( ext4_lblk_t, from )
__field( ext4_lblk_t, to )
__field( ext4_fsblk_t, partial )
__field( ext4_fsblk_t, ee_pblk )
__field( ext4_lblk_t, ee_lblk )
__field( unsigned short, ee_len )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ee_lblk = cpu_to_le32(ex->ee_block);
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_len = ext4_ext_get_actual_len(ex);
__entry->ino = inode->i_ino;
__entry->from = from;
__entry->to = to;
__entry->partial = partial_cluster;
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_lblk = cpu_to_le32(ex->ee_block);
__entry->ee_len = ext4_ext_get_actual_len(ex);
),
TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
@ -1942,23 +1942,23 @@ TRACE_EVENT(ext4_ext_rm_leaf,
TP_ARGS(inode, start, ex, partial_cluster),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_fsblk_t, partial )
__field( ext4_lblk_t, start )
__field( ext4_lblk_t, ee_lblk )
__field( ext4_fsblk_t, ee_pblk )
__field( short, ee_len )
__field( ext4_fsblk_t, partial )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->partial = partial_cluster;
__entry->start = start;
__entry->ee_lblk = le32_to_cpu(ex->ee_block);
__entry->ee_pblk = ext4_ext_pblock(ex);
__entry->ee_len = ext4_ext_get_actual_len(ex);
__entry->partial = partial_cluster;
),
TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
@ -1978,14 +1978,14 @@ TRACE_EVENT(ext4_ext_rm_idx,
TP_ARGS(inode, pblk),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_fsblk_t, pblk )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pblk = pblk;
),
@ -2001,15 +2001,15 @@ TRACE_EVENT(ext4_ext_remove_space,
TP_ARGS(inode, start, depth),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, start )
__field( int, depth )
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->start = start;
__entry->depth = depth;
),
@ -2028,8 +2028,8 @@ TRACE_EVENT(ext4_ext_remove_space_done,
TP_ARGS(inode, start, depth, partial, eh_entries),
TP_STRUCT__entry(
__field( ino_t, ino )
__field( dev_t, dev )
__field( ino_t, ino )
__field( ext4_lblk_t, start )
__field( int, depth )
__field( ext4_lblk_t, partial )
@ -2037,8 +2037,8 @@ TRACE_EVENT(ext4_ext_remove_space_done,
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->start = start;
__entry->depth = depth;
__entry->partial = partial;