Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (36 commits)
  ext4: fix up rb_root initializations to use RB_ROOT
  ext4: Code cleanup for EXT4_IOC_MOVE_EXT ioctl
  ext4: Fix the NULL reference in double_down_write_data_sem()
  ext4: Fix insertion point of extent in mext_insert_across_blocks()
  ext4: consolidate in_range() definitions
  ext4: cleanup to use ext4_grp_offs_to_block()
  ext4: cleanup to use ext4_group_first_block_no()
  ext4: Release page references acquired in ext4_da_block_invalidatepages
  ext4: Fix ext4_quota_write cross block boundary behaviour
  ext4: Convert BUG_ON checks to use ext4_error() instead
  ext4: Use direct_IO_no_locking in ext4 dio read
  ext4: use ext4_get_block_write in buffer write
  ext4: mechanical rename some of the direct I/O get_block's identifiers
  ext4: make "offset" consistent in ext4_check_dir_entry()
  ext4: Handle non empty on-disk orphan link
  ext4: explicitly remove inode from orphan list after failed direct io
  ext4: fix error handling in migrate
  ext4: deprecate obsoleted mount options
  ext4: Fix fencepost error in chosing choosing group vs file preallocation.
  jbd2: clean up an assertion in jbd2_journal_commit_transaction()
  ...
This commit is contained in:
Linus Torvalds 2010-03-05 10:47:00 -08:00
commit 1f63b9c15b
27 changed files with 1348 additions and 661 deletions

View File

@ -97,8 +97,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
/* If checksum is bad mark all blocks used to prevent allocation /* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */ * essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __func__, ext4_error(sb, "Checksum bad for group %u",
"Checksum bad for group %u", block_group); block_group);
ext4_free_blks_set(sb, gdp, 0); ext4_free_blks_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0);
@ -130,8 +130,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* to make sure we calculate the right free blocks * to make sure we calculate the right free blocks
*/ */
group_blocks = ext4_blocks_count(sbi->s_es) - group_blocks = ext4_blocks_count(sbi->s_es) -
le32_to_cpu(sbi->s_es->s_first_data_block) - ext4_group_first_block_no(sb, ngroups - 1);
(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
} else { } else {
group_blocks = EXT4_BLOCKS_PER_GROUP(sb); group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
} }
@ -189,9 +188,6 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* when a file system is mounted (see ext4_fill_super). * when a file system is mounted (see ext4_fill_super).
*/ */
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
/** /**
* ext4_get_group_desc() -- load group descriptor from disk * ext4_get_group_desc() -- load group descriptor from disk
* @sb: super block * @sb: super block
@ -210,10 +206,8 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
if (block_group >= ngroups) { if (block_group >= ngroups) {
ext4_error(sb, "ext4_get_group_desc", ext4_error(sb, "block_group >= groups_count - block_group = %u,"
"block_group >= groups_count - " " groups_count = %u", block_group, ngroups);
"block_group = %u, groups_count = %u",
block_group, ngroups);
return NULL; return NULL;
} }
@ -221,8 +215,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
if (!sbi->s_group_desc[group_desc]) { if (!sbi->s_group_desc[group_desc]) {
ext4_error(sb, "ext4_get_group_desc", ext4_error(sb, "Group descriptor not loaded - "
"Group descriptor not loaded - "
"block_group = %u, group_desc = %u, desc = %u", "block_group = %u, group_desc = %u, desc = %u",
block_group, group_desc, offset); block_group, group_desc, offset);
return NULL; return NULL;
@ -282,9 +275,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
return 1; return 1;
err_out: err_out:
ext4_error(sb, __func__, ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
"Invalid block bitmap - "
"block_group = %d, block = %llu",
block_group, bitmap_blk); block_group, bitmap_blk);
return 0; return 0;
} }
@ -311,8 +302,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
bitmap_blk = ext4_block_bitmap(sb, desc); bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk); bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) { if (unlikely(!bh)) {
ext4_error(sb, __func__, ext4_error(sb, "Cannot read block bitmap - "
"Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu", "block_group = %u, block_bitmap = %llu",
block_group, bitmap_blk); block_group, bitmap_blk);
return NULL; return NULL;
@ -354,8 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
set_bitmap_uptodate(bh); set_bitmap_uptodate(bh);
if (bh_submit_read(bh) < 0) { if (bh_submit_read(bh) < 0) {
put_bh(bh); put_bh(bh);
ext4_error(sb, __func__, ext4_error(sb, "Cannot read block bitmap - "
"Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu", "block_group = %u, block_bitmap = %llu",
block_group, bitmap_blk); block_group, bitmap_blk);
return NULL; return NULL;
@ -419,8 +408,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, desc), in_range(block + count - 1, ext4_inode_table(sb, desc),
sbi->s_itb_per_group)) { sbi->s_itb_per_group)) {
ext4_error(sb, __func__, ext4_error(sb, "Adding blocks in system zones - "
"Adding blocks in system zones - "
"Block = %llu, count = %lu", "Block = %llu, count = %lu",
block, count); block, count);
goto error_return; goto error_return;
@ -453,8 +441,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
BUFFER_TRACE(bitmap_bh, "clear bit"); BUFFER_TRACE(bitmap_bh, "clear bit");
if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
bit + i, bitmap_bh->b_data)) { bit + i, bitmap_bh->b_data)) {
ext4_error(sb, __func__, ext4_error(sb, "bit already cleared for block %llu",
"bit already cleared for block %llu",
(ext4_fsblk_t)(block + i)); (ext4_fsblk_t)(block + i));
BUFFER_TRACE(bitmap_bh, "bit already cleared"); BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else { } else {

View File

@ -205,14 +205,14 @@ void ext4_release_system_zone(struct super_block *sb)
entry = rb_entry(n, struct ext4_system_zone, node); entry = rb_entry(n, struct ext4_system_zone, node);
kmem_cache_free(ext4_system_zone_cachep, entry); kmem_cache_free(ext4_system_zone_cachep, entry);
if (!parent) if (!parent)
EXT4_SB(sb)->system_blks.rb_node = NULL; EXT4_SB(sb)->system_blks = RB_ROOT;
else if (parent->rb_left == n) else if (parent->rb_left == n)
parent->rb_left = NULL; parent->rb_left = NULL;
else if (parent->rb_right == n) else if (parent->rb_right == n)
parent->rb_right = NULL; parent->rb_right = NULL;
n = parent; n = parent;
} }
EXT4_SB(sb)->system_blks.rb_node = NULL; EXT4_SB(sb)->system_blks = RB_ROOT;
} }
/* /*

View File

@ -83,10 +83,12 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
error_msg = "inode out of bounds"; error_msg = "inode out of bounds";
if (error_msg != NULL) if (error_msg != NULL)
ext4_error(dir->i_sb, function, __ext4_error(dir->i_sb, function,
"bad entry in directory #%lu: %s - " "bad entry in directory #%lu: %s - block=%llu"
"offset=%u, inode=%u, rec_len=%d, name_len=%d", "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
dir->i_ino, error_msg, offset, dir->i_ino, error_msg,
(unsigned long long) bh->b_blocknr,
(unsigned) (offset%bh->b_size), offset,
le32_to_cpu(de->inode), le32_to_cpu(de->inode),
rlen, de->name_len); rlen, de->name_len);
return error_msg == NULL ? 1 : 0; return error_msg == NULL ? 1 : 0;
@ -150,7 +152,7 @@ static int ext4_readdir(struct file *filp,
*/ */
if (!bh) { if (!bh) {
if (!dir_has_error) { if (!dir_has_error) {
ext4_error(sb, __func__, "directory #%lu " ext4_error(sb, "directory #%lu "
"contains a hole at offset %Lu", "contains a hole at offset %Lu",
inode->i_ino, inode->i_ino,
(unsigned long long) filp->f_pos); (unsigned long long) filp->f_pos);
@ -303,7 +305,7 @@ static void free_rb_tree_fname(struct rb_root *root)
kfree(old); kfree(old);
} }
if (!parent) if (!parent)
root->rb_node = NULL; *root = RB_ROOT;
else if (parent->rb_left == n) else if (parent->rb_left == n)
parent->rb_left = NULL; parent->rb_left = NULL;
else if (parent->rb_right == n) else if (parent->rb_right == n)

View File

@ -53,6 +53,12 @@
#define ext4_debug(f, a...) do {} while (0) #define ext4_debug(f, a...) do {} while (0)
#endif #endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
ext4_error_inode(__func__, (inode), (fmt), ## a);
#define EXT4_ERROR_FILE(file, fmt, a...) \
ext4_error_file(__func__, (file), (fmt), ## a);
/* data type for block offset of block group */ /* data type for block offset of block group */
typedef int ext4_grpblk_t; typedef int ext4_grpblk_t;
@ -133,14 +139,14 @@ struct mpage_da_data {
int pages_written; int pages_written;
int retval; int retval;
}; };
#define DIO_AIO_UNWRITTEN 0x1 #define EXT4_IO_UNWRITTEN 0x1
typedef struct ext4_io_end { typedef struct ext4_io_end {
struct list_head list; /* per-file finished AIO list */ struct list_head list; /* per-file finished AIO list */
struct inode *inode; /* file being written to */ struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */ unsigned int flag; /* unwritten or not */
int error; /* I/O error code */ struct page *page; /* page struct for buffer write */
ext4_lblk_t offset; /* offset in the file */ loff_t offset; /* offset in the file */
size_t size; /* size of the extent */ ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */ struct work_struct work; /* data work queue */
} ext4_io_end_t; } ext4_io_end_t;
@ -284,10 +290,12 @@ struct flex_groups {
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define EXT4_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ #define EXT4_FL_USER_MODIFIABLE 0x004B80FF /* User modifiable flags */
/* Flags that should be inherited by new inodes from their parent. */ /* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\ #define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
@ -313,17 +321,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
return flags & EXT4_OTHER_FLMASK; return flags & EXT4_OTHER_FLMASK;
} }
/*
* Inode dynamic state flags
*/
#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/
/* Used to pass group descriptor data when online resize is done */ /* Used to pass group descriptor data when online resize is done */
struct ext4_new_group_input { struct ext4_new_group_input {
__u32 group; /* Group number for this data */ __u32 group; /* Group number for this data */
@ -364,19 +361,20 @@ struct ext4_new_group_data {
/* caller is from the direct IO path, request to creation of an /* caller is from the direct IO path, request to creation of an
unitialized extents if not allocated, split the uninitialized unitialized extents if not allocated, split the uninitialized
extent if blocks has been preallocated already*/ extent if blocks has been preallocated already*/
#define EXT4_GET_BLOCKS_DIO 0x0008 #define EXT4_GET_BLOCKS_PRE_IO 0x0008
#define EXT4_GET_BLOCKS_CONVERT 0x0010 #define EXT4_GET_BLOCKS_CONVERT 0x0010
#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\ #define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
/* Convert extent to initialized after IO complete */
#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
/* Convert extent to initialized after direct IO complete */
#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
EXT4_GET_BLOCKS_DIO_CREATE_EXT)
/* /*
* Flags used by ext4_free_blocks * Flags used by ext4_free_blocks
*/ */
#define EXT4_FREE_BLOCKS_METADATA 0x0001 #define EXT4_FREE_BLOCKS_METADATA 0x0001
#define EXT4_FREE_BLOCKS_FORGET 0x0002 #define EXT4_FREE_BLOCKS_FORGET 0x0002
#define EXT4_FREE_BLOCKS_VALIDATED 0x0004
/* /*
* ioctl commands * ioctl commands
@ -630,7 +628,7 @@ struct ext4_inode_info {
* near to their parent directory's inode. * near to their parent directory's inode.
*/ */
ext4_group_t i_block_group; ext4_group_t i_block_group;
__u32 i_state; /* Dynamic state flags for ext4 */ unsigned long i_state_flags; /* Dynamic state flags */
ext4_lblk_t i_dir_start_lookup; ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4_FS_XATTR #ifdef CONFIG_EXT4_FS_XATTR
@ -708,8 +706,9 @@ struct ext4_inode_info {
qsize_t i_reserved_quota; qsize_t i_reserved_quota;
#endif #endif
/* completed async DIOs that might need unwritten extents handling */ /* completed IOs that might need unwritten extents handling */
struct list_head i_aio_dio_complete_list; struct list_head i_completed_io_list;
spinlock_t i_completed_io_lock;
/* current io_end structure for async DIO write*/ /* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio; ext4_io_end_t *cur_aio_dio;
@ -760,6 +759,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
@ -1050,6 +1050,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
(ino >= EXT4_FIRST_INO(sb) && (ino >= EXT4_FIRST_INO(sb) &&
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
} }
/*
* Inode dynamic state flags
*/
enum {
EXT4_STATE_JDATA, /* journaled data exists */
EXT4_STATE_NEW, /* inode is newly created */
EXT4_STATE_XATTR, /* has in-inode xattrs */
EXT4_STATE_NO_EXPAND, /* No space for expansion */
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
};
static inline int ext4_test_inode_state(struct inode *inode, int bit)
{
return test_bit(bit, &EXT4_I(inode)->i_state_flags);
}
static inline void ext4_set_inode_state(struct inode *inode, int bit)
{
set_bit(bit, &EXT4_I(inode)->i_state_flags);
}
static inline void ext4_clear_inode_state(struct inode *inode, int bit)
{
clear_bit(bit, &EXT4_I(inode)->i_state_flags);
}
#else #else
/* Assume that user mode programs are passing in an ext4fs superblock, not /* Assume that user mode programs are passing in an ext4fs superblock, not
* a kernel struct super_block. This will allow us to call the feature-test * a kernel struct super_block. This will allow us to call the feature-test
@ -1126,6 +1154,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@ -1439,7 +1469,7 @@ extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from); struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int flush_aio_dio_completed_IO(struct inode *inode); extern int flush_completed_IO(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode, extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim); int used, int quota_claim);
/* ioctl.c */ /* ioctl.c */
@ -1465,13 +1495,20 @@ extern int ext4_group_extend(struct super_block *sb,
ext4_fsblk_t n_blocks_count); ext4_fsblk_t n_blocks_count);
/* super.c */ /* super.c */
extern void ext4_error(struct super_block *, const char *, const char *, ...) extern void __ext4_error(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message)
extern void ext4_error_inode(const char *, struct inode *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
extern void ext4_error_file(const char *, struct file *, const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 3, 4)));
extern void __ext4_std_error(struct super_block *, const char *, int); extern void __ext4_std_error(struct super_block *, const char *, int);
extern void ext4_abort(struct super_block *, const char *, const char *, ...) extern void ext4_abort(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 3, 4)));
extern void ext4_warning(struct super_block *, const char *, const char *, ...) extern void __ext4_warning(struct super_block *, const char *,
const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 3, 4)));
#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message)
extern void ext4_msg(struct super_block *, const char *, const char *, ...) extern void ext4_msg(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 3, 4)));
extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
@ -1744,7 +1781,7 @@ extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len); loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
loff_t len); ssize_t len);
extern int ext4_get_blocks(handle_t *handle, struct inode *inode, extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
sector_t block, unsigned int max_blocks, sector_t block, unsigned int max_blocks,
struct buffer_head *bh, int flags); struct buffer_head *bh, int flags);
@ -1756,6 +1793,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
__u64 len, __u64 *moved_len); __u64 len, __u64 *moved_len);
/* BH_Uninit flag: blocks are allocated but uninitialized on disk */
enum ext4_state_bits {
BH_Uninit /* blocks are allocated but uninitialized on disk */
= BH_JBDPrivateStart,
};
BUFFER_FNS(Uninit, uninit)
TAS_BUFFER_FNS(Uninit, uninit)
/* /*
* Add new method to test wether block and inode bitmaps are properly * Add new method to test wether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough * initialized. With uninit_bg reading the block from disk is not enough
@ -1773,6 +1819,8 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
} }
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _EXT4_H */ #endif /* _EXT4_H */

View File

@ -125,14 +125,14 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, __func__, bh,
handle, err); handle, err);
} else { } else {
if (inode && bh) if (inode)
mark_buffer_dirty_inode(bh, inode); mark_buffer_dirty_inode(bh, inode);
else else
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
if (inode && inode_needs_sync(inode)) { if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) { if (buffer_req(bh) && !buffer_uptodate(bh)) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb,
"IO error syncing inode, " "IO error syncing inode, "
"inode=%lu, block=%llu", "inode=%lu, block=%llu",
inode->i_ino, inode->i_ino,

View File

@ -304,4 +304,28 @@ static inline int ext4_should_writeback_data(struct inode *inode)
return 0; return 0;
} }
/*
* This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking
* i_mutex for direct I/O reads. This only works for extent-based
* files, and it doesn't work for nobh or if data journaling is
* enabled, since the dioread_nolock code uses b_private to pass
* information back to the I/O completion handler, and this conflicts
* with the jbd's use of b_private.
*/
static inline int ext4_should_dioread_nolock(struct inode *inode)
{
if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
return 0;
if (test_opt(inode->i_sb, NOBH))
return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return 0;
if (ext4_should_journal_data(inode))
return 0;
return 1;
}
#endif /* _EXT4_JBD2_H */ #endif /* _EXT4_JBD2_H */

View File

@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
if (S_ISREG(inode->i_mode)) if (S_ISREG(inode->i_mode))
block_group++; block_group++;
} }
bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) + bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1; last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
/* /*
@ -440,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0; return 0;
corrupted: corrupted:
ext4_error(inode->i_sb, function, __ext4_error(inode->i_sb, function,
"bad header/extent in inode #%lu: %s - magic %x, " "bad header/extent in inode #%lu: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)", "entries %u, max %u(%u), depth %u(%u)",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic), inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@ -703,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
} }
eh = ext_block_hdr(bh); eh = ext_block_hdr(bh);
ppos++; ppos++;
BUG_ON(ppos > depth); if (unlikely(ppos > depth)) {
put_bh(bh);
EXT4_ERROR_INODE(inode,
"ppos %d > depth %d", ppos, depth);
goto err;
}
path[ppos].p_bh = bh; path[ppos].p_bh = bh;
path[ppos].p_hdr = eh; path[ppos].p_hdr = eh;
i--; i--;
@ -749,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
if (err) if (err)
return err; return err;
BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block)); if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
EXT4_ERROR_INODE(inode,
"logical %d == ei_block %d!",
logical, le32_to_cpu(curp->p_idx->ei_block));
return -EIO;
}
len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
if (logical > le32_to_cpu(curp->p_idx->ei_block)) { if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
/* insert after */ /* insert after */
@ -779,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ext4_idx_store_pblock(ix, ptr); ext4_idx_store_pblock(ix, ptr);
le16_add_cpu(&curp->p_hdr->eh_entries, 1); le16_add_cpu(&curp->p_hdr->eh_entries, 1);
BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries) if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
> le16_to_cpu(curp->p_hdr->eh_max)); > le16_to_cpu(curp->p_hdr->eh_max))) {
BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr)); EXT4_ERROR_INODE(inode,
"logical %d == ei_block %d!",
logical, le32_to_cpu(curp->p_idx->ei_block));
return -EIO;
}
if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
return -EIO;
}
err = ext4_ext_dirty(handle, inode, curp); err = ext4_ext_dirty(handle, inode, curp);
ext4_std_error(inode->i_sb, err); ext4_std_error(inode->i_sb, err);
@ -819,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* if current leaf will be split, then we should use /* if current leaf will be split, then we should use
* border from split point */ * border from split point */
BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr)); if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
return -EIO;
}
if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
border = path[depth].p_ext[1].ee_block; border = path[depth].p_ext[1].ee_block;
ext_debug("leaf will be split." ext_debug("leaf will be split."
@ -860,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* initialize new leaf */ /* initialize new leaf */
newblock = ablocks[--a]; newblock = ablocks[--a];
BUG_ON(newblock == 0); if (unlikely(newblock == 0)) {
EXT4_ERROR_INODE(inode, "newblock == 0!");
err = -EIO;
goto cleanup;
}
bh = sb_getblk(inode->i_sb, newblock); bh = sb_getblk(inode->i_sb, newblock);
if (!bh) { if (!bh) {
err = -EIO; err = -EIO;
@ -880,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ex = EXT_FIRST_EXTENT(neh); ex = EXT_FIRST_EXTENT(neh);
/* move remainder of path[depth] to the new leaf */ /* move remainder of path[depth] to the new leaf */
BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max); if (unlikely(path[depth].p_hdr->eh_entries !=
path[depth].p_hdr->eh_max)) {
EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
path[depth].p_hdr->eh_entries,
path[depth].p_hdr->eh_max);
err = -EIO;
goto cleanup;
}
/* start copy from next extent */ /* start copy from next extent */
/* TODO: we could do it by single memmove */ /* TODO: we could do it by single memmove */
m = 0; m = 0;
@ -927,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* create intermediate indexes */ /* create intermediate indexes */
k = depth - at - 1; k = depth - at - 1;
BUG_ON(k < 0); if (unlikely(k < 0)) {
EXT4_ERROR_INODE(inode, "k %d < 0!", k);
err = -EIO;
goto cleanup;
}
if (k) if (k)
ext_debug("create %d intermediate indices\n", k); ext_debug("create %d intermediate indices\n", k);
/* insert new index into current index block */ /* insert new index into current index block */
@ -964,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx, ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
EXT_MAX_INDEX(path[i].p_hdr)); EXT_MAX_INDEX(path[i].p_hdr));
BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) != if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
EXT_LAST_INDEX(path[i].p_hdr)); EXT_LAST_INDEX(path[i].p_hdr))) {
EXT4_ERROR_INODE(inode,
"EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
le32_to_cpu(path[i].p_ext->ee_block));
err = -EIO;
goto cleanup;
}
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i, ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block), le32_to_cpu(path[i].p_idx->ei_block),
@ -1203,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent *ex; struct ext4_extent *ex;
int depth, ee_len; int depth, ee_len;
BUG_ON(path == NULL); if (unlikely(path == NULL)) {
EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
return -EIO;
}
depth = path->p_depth; depth = path->p_depth;
*phys = 0; *phys = 0;
@ -1217,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext; ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) { if (*logical < le32_to_cpu(ex->ee_block)) {
BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
"EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
*logical, le32_to_cpu(ex->ee_block));
return -EIO;
}
while (--depth >= 0) { while (--depth >= 0) {
ix = path[depth].p_idx; ix = path[depth].p_idx;
BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode,
"ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
ix != NULL ? ix->ei_block : 0,
EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
depth);
return -EIO;
}
} }
return 0; return 0;
} }
BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
*logical, le32_to_cpu(ex->ee_block), ee_len);
return -EIO;
}
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1; *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
*phys = ext_pblock(ex) + ee_len - 1; *phys = ext_pblock(ex) + ee_len - 1;
@ -1251,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
int depth; /* Note, NOT eh_depth; depth from top of tree */ int depth; /* Note, NOT eh_depth; depth from top of tree */
int ee_len; int ee_len;
BUG_ON(path == NULL); if (unlikely(path == NULL)) {
EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
return -EIO;
}
depth = path->p_depth; depth = path->p_depth;
*phys = 0; *phys = 0;
@ -1265,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext; ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) { if (*logical < le32_to_cpu(ex->ee_block)) {
BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex); if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
EXT4_ERROR_INODE(inode,
"first_extent(path[%d].p_hdr) != ex",
depth);
return -EIO;
}
while (--depth >= 0) { while (--depth >= 0) {
ix = path[depth].p_idx; ix = path[depth].p_idx;
BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr)); if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
EXT4_ERROR_INODE(inode,
"ix != EXT_FIRST_INDEX *logical %d!",
*logical);
return -EIO;
}
} }
*logical = le32_to_cpu(ex->ee_block); *logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex); *phys = ext_pblock(ex);
return 0; return 0;
} }
BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len)); if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
EXT4_ERROR_INODE(inode,
"logical %d < ee_block %d + ee_len %d!",
*logical, le32_to_cpu(ex->ee_block), ee_len);
return -EIO;
}
if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) { if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
/* next allocated block in this leaf */ /* next allocated block in this leaf */
@ -1414,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
eh = path[depth].p_hdr; eh = path[depth].p_hdr;
ex = path[depth].p_ext; ex = path[depth].p_ext;
BUG_ON(ex == NULL);
BUG_ON(eh == NULL); if (unlikely(ex == NULL || eh == NULL)) {
EXT4_ERROR_INODE(inode,
"ex %p == NULL or eh %p == NULL", ex, eh);
return -EIO;
}
if (depth == 0) { if (depth == 0) {
/* there is no tree at all */ /* there is no tree at all */
@ -1538,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
merge_done = 1; merge_done = 1;
WARN_ON(eh->eh_entries == 0); WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries) if (!eh->eh_entries)
ext4_error(inode->i_sb, "ext4_ext_try_to_merge", ext4_error(inode->i_sb,
"inode#%lu, eh->eh_entries = 0!", inode->i_ino); "inode#%lu, eh->eh_entries = 0!",
inode->i_ino);
} }
return merge_done; return merge_done;
@ -1612,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext4_lblk_t next; ext4_lblk_t next;
unsigned uninitialized = 0; unsigned uninitialized = 0;
BUG_ON(ext4_ext_get_actual_len(newext) == 0); if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
return -EIO;
}
depth = ext_depth(inode); depth = ext_depth(inode);
ex = path[depth].p_ext; ex = path[depth].p_ext;
BUG_ON(path[depth].p_hdr == NULL); if (unlikely(path[depth].p_hdr == NULL)) {
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
return -EIO;
}
/* try to insert block into found extent and return */ /* try to insert block into found extent and return */
if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
&& ext4_can_extents_be_merged(inode, ex, newext)) { && ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n", ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext), ext4_ext_is_uninitialized(newext),
@ -1739,7 +1830,7 @@ has_space:
merge: merge:
/* try to merge extents to the right */ /* try to merge extents to the right */
if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT) if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(inode, path, nearex); ext4_ext_try_to_merge(inode, path, nearex);
/* try to merge extents to the left */ /* try to merge extents to the left */
@ -1787,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
} }
depth = ext_depth(inode); depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL); if (unlikely(path[depth].p_hdr == NULL)) {
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
err = -EIO;
break;
}
ex = path[depth].p_ext; ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path); next = ext4_ext_next_allocated_block(path);
@ -1838,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_type = EXT4_EXT_CACHE_EXTENT; cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
} }
BUG_ON(cbex.ec_len == 0); if (unlikely(cbex.ec_len == 0)) {
EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
err = -EIO;
break;
}
err = func(inode, path, &cbex, ex, cbdata); err = func(inode, path, &cbex, ex, cbdata);
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
@ -1952,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
cex->ec_type != EXT4_EXT_CACHE_EXTENT); cex->ec_type != EXT4_EXT_CACHE_EXTENT);
if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { if (in_range(block, cex->ec_block, cex->ec_len)) {
ex->ee_block = cpu_to_le32(cex->ec_block); ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start); ext4_ext_store_pblock(ex, cex->ec_start);
ex->ee_len = cpu_to_le16(cex->ec_len); ex->ee_len = cpu_to_le16(cex->ec_len);
@ -1981,7 +2080,10 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
/* free index block */ /* free index block */
path--; path--;
leaf = idx_pblock(path->p_idx); leaf = idx_pblock(path->p_idx);
BUG_ON(path->p_hdr->eh_entries == 0); if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
return -EIO;
}
err = ext4_ext_get_access(handle, inode, path); err = ext4_ext_get_access(handle, inode, path);
if (err) if (err)
return err; return err;
@ -2119,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (!path[depth].p_hdr) if (!path[depth].p_hdr)
path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
eh = path[depth].p_hdr; eh = path[depth].p_hdr;
BUG_ON(eh == NULL); if (unlikely(path[depth].p_hdr == NULL)) {
EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
return -EIO;
}
/* find where to start removing */ /* find where to start removing */
ex = EXT_LAST_EXTENT(eh); ex = EXT_LAST_EXTENT(eh);
@ -2983,7 +3087,7 @@ fix_extent_len:
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
return err; return err;
} }
static int ext4_convert_unwritten_extents_dio(handle_t *handle, static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode, struct inode *inode,
struct ext4_ext_path *path) struct ext4_ext_path *path)
{ {
@ -3063,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
flags, allocated); flags, allocated);
ext4_ext_show_leaf(inode, path); ext4_ext_show_leaf(inode, path);
/* DIO get_block() before submit the IO, split the extent */ /* get_block() before submit the IO, split the extent */
if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
ret = ext4_split_unwritten_extents(handle, ret = ext4_split_unwritten_extents(handle,
inode, path, iblock, inode, path, iblock,
max_blocks, flags); max_blocks, flags);
@ -3074,14 +3178,16 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* completed * completed
*/ */
if (io) if (io)
io->flag = DIO_AIO_UNWRITTEN; io->flag = EXT4_IO_UNWRITTEN;
else else
EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
set_buffer_uninit(bh_result);
goto out; goto out;
} }
/* async DIO end_io complete, convert the filled extent to written */ /* IO end_io complete, convert the filled extent to written */
if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
ret = ext4_convert_unwritten_extents_dio(handle, inode, ret = ext4_convert_unwritten_extents_endio(handle, inode,
path); path);
if (ret >= 0) if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1); ext4_update_inode_fsync_trans(handle, inode, 1);
@ -3185,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
{ {
struct ext4_ext_path *path = NULL; struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
struct ext4_extent newex, *ex; struct ext4_extent newex, *ex, *last_ex;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
int err = 0, depth, ret, cache_type; int err = 0, depth, ret, cache_type;
unsigned int allocated = 0; unsigned int allocated = 0;
@ -3237,10 +3343,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* this situation is possible, though, _during_ tree modification; * this situation is possible, though, _during_ tree modification;
* this is why assert can't be put in ext4_ext_find_extent() * this is why assert can't be put in ext4_ext_find_extent()
*/ */
if (path[depth].p_ext == NULL && depth != 0) { if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
ext4_error(inode->i_sb, __func__, "bad extent address " EXT4_ERROR_INODE(inode, "bad extent address "
"inode: %lu, iblock: %d, depth: %d", "iblock: %d, depth: %d pblock %lld",
inode->i_ino, iblock, depth); iblock, depth, path[depth].p_block);
err = -EIO; err = -EIO;
goto out2; goto out2;
} }
@ -3258,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
*/ */
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */ /* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) { if (in_range(iblock, ee_block, ee_len)) {
newblock = iblock - ee_block + ee_start; newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */ /* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block); allocated = ee_len - (iblock - ee_block);
@ -3350,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
ext4_ext_mark_uninitialized(&newex); ext4_ext_mark_uninitialized(&newex);
/* /*
* io_end structure was created for every async * io_end structure was created for every IO write to an
* direct IO write to the middle of the file. * uninitialized extent. To avoid unecessary conversion,
* To avoid unecessary convertion for every aio dio rewrite * here we flag the IO that really needs the conversion.
* to the mid of file, here we flag the IO that is really
* need the convertion.
* For non asycn direct IO case, flag the inode state * For non asycn direct IO case, flag the inode state
* that we need to perform convertion when IO is done. * that we need to perform convertion when IO is done.
*/ */
if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) { if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io) if (io)
io->flag = DIO_AIO_UNWRITTEN; io->flag = EXT4_IO_UNWRITTEN;
else else
EXT4_I(inode)->i_state |= ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN;; EXT4_STATE_DIO_UNWRITTEN);
} }
if (ext4_should_dioread_nolock(inode))
set_buffer_uninit(bh_result);
}
if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
if (unlikely(!eh->eh_entries)) {
EXT4_ERROR_INODE(inode,
"eh->eh_entries == 0 ee_block %d",
ex->ee_block);
err = -EIO;
goto out2;
}
last_ex = EXT_LAST_EXTENT(eh);
if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+ ext4_ext_get_actual_len(last_ex))
EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
} }
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) { if (err) {
@ -3499,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
i_size_write(inode, new_size); i_size_write(inode, new_size);
if (new_size > EXT4_I(inode)->i_disksize) if (new_size > EXT4_I(inode)->i_disksize)
ext4_update_i_disksize(inode, new_size); ext4_update_i_disksize(inode, new_size);
} else {
/*
* Mark that we allocate beyond EOF so the subsequent truncate
* can proceed even if the new size is the same as i_size.
*/
if (new_size > i_size_read(inode))
EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
} }
} }
@ -3603,7 +3730,7 @@ retry:
* Returns 0 on success. * Returns 0 on success.
*/ */
int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
loff_t len) ssize_t len)
{ {
handle_t *handle; handle_t *handle;
ext4_lblk_t block; ext4_lblk_t block;
@ -3635,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
map_bh.b_state = 0; map_bh.b_state = 0;
ret = ext4_get_blocks(handle, inode, block, ret = ext4_get_blocks(handle, inode, block,
max_blocks, &map_bh, max_blocks, &map_bh,
EXT4_GET_BLOCKS_DIO_CONVERT_EXT); EXT4_GET_BLOCKS_IO_CONVERT_EXT);
if (ret <= 0) { if (ret <= 0) {
WARN_ON(ret <= 0); WARN_ON(ret <= 0);
printk(KERN_ERR "%s: ext4_ext_get_blocks " printk(KERN_ERR "%s: ext4_ext_get_blocks "
@ -3739,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
int error = 0; int error = 0;
/* in-inode? */ /* in-inode? */
if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
struct ext4_iloc iloc; struct ext4_iloc iloc;
int offset; /* offset of xattr in inode */ int offset; /* offset of xattr in inode */
@ -3767,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len) __u64 start, __u64 len)
{ {
ext4_lblk_t start_blk; ext4_lblk_t start_blk;
ext4_lblk_t len_blks;
int error = 0; int error = 0;
/* fallback to generic here if not in extents fmt */ /* fallback to generic here if not in extents fmt */
@ -3781,8 +3907,14 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
error = ext4_xattr_fiemap(inode, fieinfo); error = ext4_xattr_fiemap(inode, fieinfo);
} else { } else {
ext4_lblk_t len_blks;
__u64 last_blk;
start_blk = start >> inode->i_sb->s_blocksize_bits; start_blk = start >> inode->i_sb->s_blocksize_bits;
len_blks = len >> inode->i_sb->s_blocksize_bits; last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
if (last_blk >= EXT_MAX_BLOCK)
last_blk = EXT_MAX_BLOCK-1;
len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
/* /*
* Walk the extent tree gathering extent information. * Walk the extent tree gathering extent information.

View File

@ -35,9 +35,9 @@
*/ */
static int ext4_release_file(struct inode *inode, struct file *filp) static int ext4_release_file(struct inode *inode, struct file *filp)
{ {
if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
ext4_alloc_da_blocks(inode); ext4_alloc_da_blocks(inode);
EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
} }
/* if we are the last writer on the inode, drop the block reservation */ /* if we are the last writer on the inode, drop the block reservation */
if ((filp->f_mode & FMODE_WRITE) && if ((filp->f_mode & FMODE_WRITE) &&

View File

@ -63,7 +63,7 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (inode->i_sb->s_flags & MS_RDONLY) if (inode->i_sb->s_flags & MS_RDONLY)
return 0; return 0;
ret = flush_aio_dio_completed_IO(inode); ret = flush_completed_IO(inode);
if (ret < 0) if (ret < 0)
return ret; return ret;

View File

@ -76,8 +76,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
/* If checksum is bad mark all blocks and inodes use to prevent /* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */ * allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __func__, "Checksum bad for group %u", ext4_error(sb, "Checksum bad for group %u", block_group);
block_group);
ext4_free_blks_set(sb, gdp, 0); ext4_free_blks_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0); ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0); ext4_itable_unused_set(sb, gdp, 0);
@ -111,8 +110,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
bitmap_blk = ext4_inode_bitmap(sb, desc); bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk); bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) { if (unlikely(!bh)) {
ext4_error(sb, __func__, ext4_error(sb, "Cannot read inode bitmap - "
"Cannot read inode bitmap - "
"block_group = %u, inode_bitmap = %llu", "block_group = %u, inode_bitmap = %llu",
block_group, bitmap_blk); block_group, bitmap_blk);
return NULL; return NULL;
@ -153,8 +151,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
set_bitmap_uptodate(bh); set_bitmap_uptodate(bh);
if (bh_submit_read(bh) < 0) { if (bh_submit_read(bh) < 0) {
put_bh(bh); put_bh(bh);
ext4_error(sb, __func__, ext4_error(sb, "Cannot read inode bitmap - "
"Cannot read inode bitmap - "
"block_group = %u, inode_bitmap = %llu", "block_group = %u, inode_bitmap = %llu",
block_group, bitmap_blk); block_group, bitmap_blk);
return NULL; return NULL;
@ -229,8 +226,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
es = EXT4_SB(sb)->s_es; es = EXT4_SB(sb)->s_es;
if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext4_error(sb, "ext4_free_inode", ext4_error(sb, "reserved or nonexistent inode %lu", ino);
"reserved or nonexistent inode %lu", ino);
goto error_return; goto error_return;
} }
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
@ -248,8 +244,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
bit, bitmap_bh->b_data); bit, bitmap_bh->b_data);
if (!cleared) if (!cleared)
ext4_error(sb, "ext4_free_inode", ext4_error(sb, "bit already cleared for inode %lu", ino);
"bit already cleared for inode %lu", ino);
else { else {
gdp = ext4_get_group_desc(sb, block_group, &bh2); gdp = ext4_get_group_desc(sb, block_group, &bh2);
@ -736,8 +731,7 @@ static int ext4_claim_inode(struct super_block *sb,
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) { ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
ext4_error(sb, __func__, ext4_error(sb, "reserved inode or inode > inodes count - "
"reserved inode or inode > inodes count - "
"block_group = %u, inode=%lu", group, "block_group = %u, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb)); ino + group * EXT4_INODES_PER_GROUP(sb));
return 1; return 1;
@ -904,7 +898,7 @@ repeat_in_this_group:
BUFFER_TRACE(inode_bitmap_bh, BUFFER_TRACE(inode_bitmap_bh,
"call ext4_handle_dirty_metadata"); "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, err = ext4_handle_dirty_metadata(handle,
inode, NULL,
inode_bitmap_bh); inode_bitmap_bh);
if (err) if (err)
goto fail; goto fail;
@ -1029,7 +1023,8 @@ got:
inode->i_generation = sbi->s_next_generation++; inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock); spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT4_STATE_NEW; ei->i_state_flags = 0;
ext4_set_inode_state(inode, EXT4_STATE_NEW);
ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
@ -1098,8 +1093,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
/* Error cases - e2fsck has already cleaned up for us */ /* Error cases - e2fsck has already cleaned up for us */
if (ino > max_ino) { if (ino > max_ino) {
ext4_warning(sb, __func__, ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
"bad orphan ino %lu! e2fsck was run?", ino);
goto error; goto error;
} }
@ -1107,8 +1101,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group); bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh) { if (!bitmap_bh) {
ext4_warning(sb, __func__, ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
"inode bitmap error for orphan %lu", ino);
goto error; goto error;
} }
@ -1140,8 +1133,7 @@ iget_failed:
err = PTR_ERR(inode); err = PTR_ERR(inode);
inode = NULL; inode = NULL;
bad_orphan: bad_orphan:
ext4_warning(sb, __func__, ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
"bad orphan inode %lu! e2fsck was run?", ino);
printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
bit, (unsigned long long)bitmap_bh->b_blocknr, bit, (unsigned long long)bitmap_bh->b_blocknr,
ext4_test_bit(bit, bitmap_bh->b_data)); ext4_test_bit(bit, bitmap_bh->b_data));

View File

@ -38,6 +38,7 @@
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/kernel.h>
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "xattr.h" #include "xattr.h"
@ -194,7 +195,7 @@ void ext4_delete_inode(struct inode *inode)
inode->i_size = 0; inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
if (err) { if (err) {
ext4_warning(inode->i_sb, __func__, ext4_warning(inode->i_sb,
"couldn't mark inode dirty (err %d)", err); "couldn't mark inode dirty (err %d)", err);
goto stop_handle; goto stop_handle;
} }
@ -212,7 +213,7 @@ void ext4_delete_inode(struct inode *inode)
if (err > 0) if (err > 0)
err = ext4_journal_restart(handle, 3); err = ext4_journal_restart(handle, 3);
if (err != 0) { if (err != 0) {
ext4_warning(inode->i_sb, __func__, ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err); "couldn't extend journal (err %d)", err);
stop_handle: stop_handle:
ext4_journal_stop(handle); ext4_journal_stop(handle);
@ -323,8 +324,7 @@ static int ext4_block_to_path(struct inode *inode,
offsets[n++] = i_block & (ptrs - 1); offsets[n++] = i_block & (ptrs - 1);
final = ptrs; final = ptrs;
} else { } else {
ext4_warning(inode->i_sb, "ext4_block_to_path", ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
"block %lu > max in inode %lu",
i_block + direct_blocks + i_block + direct_blocks +
indirect_blocks + double_blocks, inode->i_ino); indirect_blocks + double_blocks, inode->i_ino);
} }
@ -344,7 +344,7 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
if (blk && if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) { blk, 1))) {
ext4_error(inode->i_sb, function, __ext4_error(inode->i_sb, function,
"invalid block reference %u " "invalid block reference %u "
"in inode #%lu", blk, inode->i_ino); "in inode #%lu", blk, inode->i_ino);
return -EIO; return -EIO;
@ -607,7 +607,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
if (*err) if (*err)
goto failed_out; goto failed_out;
BUG_ON(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS); if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
EXT4_ERROR_INODE(inode,
"current_block %llu + count %lu > %d!",
current_block, count,
EXT4_MAX_BLOCK_FILE_PHYS);
*err = -EIO;
goto failed_out;
}
target -= count; target -= count;
/* allocate blocks for indirect blocks */ /* allocate blocks for indirect blocks */
@ -643,7 +650,14 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
ar.flags = EXT4_MB_HINT_DATA; ar.flags = EXT4_MB_HINT_DATA;
current_block = ext4_mb_new_blocks(handle, &ar, err); current_block = ext4_mb_new_blocks(handle, &ar, err);
BUG_ON(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS); if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
EXT4_ERROR_INODE(inode,
"current_block %llu + ar.len %d > %d!",
current_block, ar.len,
EXT4_MAX_BLOCK_FILE_PHYS);
*err = -EIO;
goto failed_out;
}
if (*err && (target == blks)) { if (*err && (target == blks)) {
/* /*
@ -1061,6 +1075,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
int mdb_free = 0, allocated_meta_blocks = 0; int mdb_free = 0, allocated_meta_blocks = 0;
spin_lock(&ei->i_block_reservation_lock); spin_lock(&ei->i_block_reservation_lock);
trace_ext4_da_update_reserve_space(inode, used);
if (unlikely(used > ei->i_reserved_data_blocks)) { if (unlikely(used > ei->i_reserved_data_blocks)) {
ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
"with only %d reserved data blocks\n", "with only %d reserved data blocks\n",
@ -1124,7 +1139,7 @@ static int check_block_validity(struct inode *inode, const char *msg,
sector_t logical, sector_t phys, int len) sector_t logical, sector_t phys, int len)
{ {
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
ext4_error(inode->i_sb, msg, __ext4_error(inode->i_sb, msg,
"inode #%lu logical block %llu mapped to %llu " "inode #%lu logical block %llu mapped to %llu "
"(size %d)", inode->i_ino, "(size %d)", inode->i_ino,
(unsigned long long) logical, (unsigned long long) logical,
@ -1306,7 +1321,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
* i_data's format changing. Force the migrate * i_data's format changing. Force the migrate
* to fail by clearing migrate flags * to fail by clearing migrate flags
*/ */
EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
} }
/* /*
@ -1534,6 +1549,8 @@ static void ext4_truncate_failed_write(struct inode *inode)
ext4_truncate(inode); ext4_truncate(inode);
} }
static int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
static int ext4_write_begin(struct file *file, struct address_space *mapping, static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
@ -1575,8 +1592,12 @@ retry:
} }
*pagep = page; *pagep = page;
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, if (ext4_should_dioread_nolock(inode))
ext4_get_block); ret = block_write_begin(file, mapping, pos, len, flags, pagep,
fsdata, ext4_get_block_write);
else
ret = block_write_begin(file, mapping, pos, len, flags, pagep,
fsdata, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) { if (!ret && ext4_should_journal_data(inode)) {
ret = walk_page_buffers(handle, page_buffers(page), ret = walk_page_buffers(handle, page_buffers(page),
@ -1793,7 +1814,7 @@ static int ext4_journalled_write_end(struct file *file,
new_i_size = pos + copied; new_i_size = pos + copied;
if (new_i_size > inode->i_size) if (new_i_size > inode->i_size)
i_size_write(inode, pos+copied); i_size_write(inode, pos+copied);
EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; ext4_set_inode_state(inode, EXT4_STATE_JDATA);
if (new_i_size > EXT4_I(inode)->i_disksize) { if (new_i_size > EXT4_I(inode)->i_disksize) {
ext4_update_i_disksize(inode, new_i_size); ext4_update_i_disksize(inode, new_i_size);
ret2 = ext4_mark_inode_dirty(handle, inode); ret2 = ext4_mark_inode_dirty(handle, inode);
@ -1846,6 +1867,7 @@ repeat:
spin_lock(&ei->i_block_reservation_lock); spin_lock(&ei->i_block_reservation_lock);
md_reserved = ei->i_reserved_meta_blocks; md_reserved = ei->i_reserved_meta_blocks;
md_needed = ext4_calc_metadata_amount(inode, lblock); md_needed = ext4_calc_metadata_amount(inode, lblock);
trace_ext4_da_reserve_space(inode, md_needed);
spin_unlock(&ei->i_block_reservation_lock); spin_unlock(&ei->i_block_reservation_lock);
/* /*
@ -2091,6 +2113,8 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
} else if (buffer_mapped(bh)) } else if (buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock); BUG_ON(bh->b_blocknr != pblock);
if (buffer_uninit(exbh))
set_buffer_uninit(bh);
cur_logical++; cur_logical++;
pblock++; pblock++;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
@ -2133,17 +2157,16 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
break; break;
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i]; struct page *page = pvec.pages[i];
index = page->index; if (page->index > end)
if (index > end)
break; break;
index++;
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
block_invalidatepage(page, 0); block_invalidatepage(page, 0);
ClearPageUptodate(page); ClearPageUptodate(page);
unlock_page(page); unlock_page(page);
} }
index = pvec.pages[nr_pages - 1]->index + 1;
pagevec_release(&pvec);
} }
return; return;
} }
@ -2220,6 +2243,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
*/ */
new.b_state = 0; new.b_state = 0;
get_blocks_flags = EXT4_GET_BLOCKS_CREATE; get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
if (ext4_should_dioread_nolock(mpd->inode))
get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
if (mpd->b_state & (1 << BH_Delay)) if (mpd->b_state & (1 << BH_Delay))
get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
@ -2630,11 +2655,14 @@ static int __ext4_journalled_writepage(struct page *page,
ret = err; ret = err;
walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; ext4_set_inode_state(inode, EXT4_STATE_JDATA);
out: out:
return ret; return ret;
} }
static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
/* /*
* Note that we don't need to start a transaction unless we're journaling data * Note that we don't need to start a transaction unless we're journaling data
* because we should have holes filled from ext4_page_mkwrite(). We even don't * because we should have holes filled from ext4_page_mkwrite(). We even don't
@ -2682,7 +2710,7 @@ static int ext4_writepage(struct page *page,
int ret = 0; int ret = 0;
loff_t size; loff_t size;
unsigned int len; unsigned int len;
struct buffer_head *page_bufs; struct buffer_head *page_bufs = NULL;
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
trace_ext4_writepage(inode, page); trace_ext4_writepage(inode, page);
@ -2758,7 +2786,11 @@ static int ext4_writepage(struct page *page,
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
ret = nobh_writepage(page, noalloc_get_block_write, wbc); ret = nobh_writepage(page, noalloc_get_block_write, wbc);
else else if (page_bufs && buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
} else
ret = block_write_full_page(page, noalloc_get_block_write, ret = block_write_full_page(page, noalloc_get_block_write,
wbc); wbc);
@ -3301,7 +3333,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
filemap_write_and_wait(mapping); filemap_write_and_wait(mapping);
} }
if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { if (EXT4_JOURNAL(inode) &&
ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
/* /*
* This is a REALLY heavyweight approach, but the use of * This is a REALLY heavyweight approach, but the use of
* bmap on dirty files is expected to be extremely rare: * bmap on dirty files is expected to be extremely rare:
@ -3320,7 +3353,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
* everything they get. * everything they get.
*/ */
EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
journal = EXT4_JOURNAL(inode); journal = EXT4_JOURNAL(inode);
jbd2_journal_lock_updates(journal); jbd2_journal_lock_updates(journal);
err = jbd2_journal_flush(journal); err = jbd2_journal_flush(journal);
@ -3345,10 +3378,44 @@ ext4_readpages(struct file *file, struct address_space *mapping,
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
} }
static void ext4_free_io_end(ext4_io_end_t *io)
{
BUG_ON(!io);
if (io->page)
put_page(io->page);
iput(io->inode);
kfree(io);
}
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
unsigned int curr_off = 0;
if (!page_has_buffers(page))
return;
head = bh = page_buffers(page);
do {
if (offset <= curr_off && test_clear_buffer_uninit(bh)
&& bh->b_private) {
ext4_free_io_end(bh->b_private);
bh->b_private = NULL;
bh->b_end_io = NULL;
}
curr_off = curr_off + bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
}
static void ext4_invalidatepage(struct page *page, unsigned long offset) static void ext4_invalidatepage(struct page *page, unsigned long offset)
{ {
journal_t *journal = EXT4_JOURNAL(page->mapping->host); journal_t *journal = EXT4_JOURNAL(page->mapping->host);
/*
* free any io_end structure allocated for buffers to be discarded
*/
if (ext4_should_dioread_nolock(page->mapping->host))
ext4_invalidatepage_free_endio(page, offset);
/* /*
* If it's a full truncate we just forget about the pending dirtying * If it's a full truncate we just forget about the pending dirtying
*/ */
@ -3420,7 +3487,14 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
} }
retry: retry:
ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, if (rw == READ && ext4_should_dioread_nolock(inode))
ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext4_get_block, NULL);
else
ret = blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs, offset, nr_segs,
ext4_get_block, NULL); ext4_get_block, NULL);
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@ -3436,6 +3510,9 @@ retry:
* but cannot extend i_size. Bail out and pretend * but cannot extend i_size. Bail out and pretend
* the write failed... */ * the write failed... */
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
if (inode->i_nlink)
ext4_orphan_del(NULL, inode);
goto out; goto out;
} }
if (inode->i_nlink) if (inode->i_nlink)
@ -3463,75 +3540,63 @@ out:
return ret; return ret;
} }
static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, static int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
handle_t *handle = NULL; handle_t *handle = ext4_journal_current_handle();
int ret = 0; int ret = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
int dio_credits; int dio_credits;
int started = 0;
ext4_debug("ext4_get_block_dio_write: inode %lu, create flag %d\n", ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
inode->i_ino, create); inode->i_ino, create);
/* /*
* DIO VFS code passes create = 0 flag for write to * ext4_get_block in prepare for a DIO write or buffer write.
* the middle of file. It does this to avoid block * We allocate an uinitialized extent if blocks haven't been allocated.
* allocation for holes, to prevent expose stale data * The extent will be converted to initialized after IO complete.
* out when there is parallel buffered read (which does
* not hold the i_mutex lock) while direct IO write has
* not completed. DIO request on holes finally falls back
* to buffered IO for this reason.
*
* For ext4 extent based file, since we support fallocate,
* new allocated extent as uninitialized, for holes, we
* could fallocate blocks for holes, thus parallel
* buffered IO read will zero out the page when read on
* a hole while parallel DIO write to the hole has not completed.
*
* when we come here, we know it's a direct IO write to
* to the middle of file (<i_size)
* so it's safe to override the create flag from VFS.
*/ */
create = EXT4_GET_BLOCKS_DIO_CREATE_EXT; create = EXT4_GET_BLOCKS_IO_CREATE_EXT;
if (max_blocks > DIO_MAX_BLOCKS) if (!handle) {
max_blocks = DIO_MAX_BLOCKS; if (max_blocks > DIO_MAX_BLOCKS)
dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); max_blocks = DIO_MAX_BLOCKS;
handle = ext4_journal_start(inode, dio_credits); dio_credits = ext4_chunk_trans_blocks(inode, max_blocks);
if (IS_ERR(handle)) { handle = ext4_journal_start(inode, dio_credits);
ret = PTR_ERR(handle); if (IS_ERR(handle)) {
goto out; ret = PTR_ERR(handle);
goto out;
}
started = 1;
} }
ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
create); create);
if (ret > 0) { if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits); bh_result->b_size = (ret << inode->i_blkbits);
ret = 0; ret = 0;
} }
ext4_journal_stop(handle); if (started)
ext4_journal_stop(handle);
out: out:
return ret; return ret;
} }
static void ext4_free_io_end(ext4_io_end_t *io) static void dump_completed_IO(struct inode * inode)
{
BUG_ON(!io);
iput(io->inode);
kfree(io);
}
static void dump_aio_dio_list(struct inode * inode)
{ {
#ifdef EXT4_DEBUG #ifdef EXT4_DEBUG
struct list_head *cur, *before, *after; struct list_head *cur, *before, *after;
ext4_io_end_t *io, *io0, *io1; ext4_io_end_t *io, *io0, *io1;
unsigned long flags;
if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
ext4_debug("inode %lu aio dio list is empty\n", inode->i_ino); ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
return; return;
} }
ext4_debug("Dump inode %lu aio_dio_completed_IO list \n", inode->i_ino); ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
list_for_each_entry(io, &EXT4_I(inode)->i_aio_dio_complete_list, list){ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
cur = &io->list; cur = &io->list;
before = cur->prev; before = cur->prev;
io0 = container_of(before, ext4_io_end_t, list); io0 = container_of(before, ext4_io_end_t, list);
@ -3541,32 +3606,31 @@ static void dump_aio_dio_list(struct inode * inode)
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
io, inode->i_ino, io0, io1); io, inode->i_ino, io0, io1);
} }
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
#endif #endif
} }
/* /*
* check a range of space and convert unwritten extents to written. * check a range of space and convert unwritten extents to written.
*/ */
static int ext4_end_aio_dio_nolock(ext4_io_end_t *io) static int ext4_end_io_nolock(ext4_io_end_t *io)
{ {
struct inode *inode = io->inode; struct inode *inode = io->inode;
loff_t offset = io->offset; loff_t offset = io->offset;
size_t size = io->size; ssize_t size = io->size;
int ret = 0; int ret = 0;
ext4_debug("end_aio_dio_onlock: io 0x%p from inode %lu,list->next 0x%p," ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n", "list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev); io, inode->i_ino, io->list.next, io->list.prev);
if (list_empty(&io->list)) if (list_empty(&io->list))
return ret; return ret;
if (io->flag != DIO_AIO_UNWRITTEN) if (io->flag != EXT4_IO_UNWRITTEN)
return ret; return ret;
if (offset + size <= i_size_read(inode)) ret = ext4_convert_unwritten_extents(inode, offset, size);
ret = ext4_convert_unwritten_extents(inode, offset, size);
if (ret < 0) { if (ret < 0) {
printk(KERN_EMERG "%s: failed to convert unwritten" printk(KERN_EMERG "%s: failed to convert unwritten"
"extents to written extents, error is %d" "extents to written extents, error is %d"
@ -3579,50 +3643,64 @@ static int ext4_end_aio_dio_nolock(ext4_io_end_t *io)
io->flag = 0; io->flag = 0;
return ret; return ret;
} }
/* /*
* work on completed aio dio IO, to convert unwritten extents to extents * work on completed aio dio IO, to convert unwritten extents to extents
*/ */
static void ext4_end_aio_dio_work(struct work_struct *work) static void ext4_end_io_work(struct work_struct *work)
{ {
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
struct inode *inode = io->inode; struct inode *inode = io->inode;
int ret = 0; struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
ret = ext4_end_aio_dio_nolock(io); ret = ext4_end_io_nolock(io);
if (ret >= 0) { if (ret < 0) {
if (!list_empty(&io->list)) mutex_unlock(&inode->i_mutex);
list_del_init(&io->list); return;
ext4_free_io_end(io);
} }
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (!list_empty(&io->list))
list_del_init(&io->list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
ext4_free_io_end(io);
} }
/* /*
* This function is called from ext4_sync_file(). * This function is called from ext4_sync_file().
* *
* When AIO DIO IO is completed, the work to convert unwritten * When IO is completed, the work to convert unwritten extents to
* extents to written is queued on workqueue but may not get immediately * written is queued on workqueue but may not get immediately
* scheduled. When fsync is called, we need to ensure the * scheduled. When fsync is called, we need to ensure the
* conversion is complete before fsync returns. * conversion is complete before fsync returns.
* The inode keeps track of a list of completed AIO from DIO path * The inode keeps track of a list of pending/completed IO that
* that might needs to do the conversion. This function walks through * might needs to do the conversion. This function walks through
* the list and convert the related unwritten extents to written. * the list and convert the related unwritten extents for completed IO
* to written.
* The function return the number of pending IOs on success.
*/ */
int flush_aio_dio_completed_IO(struct inode *inode) int flush_completed_IO(struct inode *inode)
{ {
ext4_io_end_t *io; ext4_io_end_t *io;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned long flags;
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
if (list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)) if (list_empty(&ei->i_completed_io_list))
return ret; return ret;
dump_aio_dio_list(inode); dump_completed_IO(inode);
while (!list_empty(&EXT4_I(inode)->i_aio_dio_complete_list)){ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
io = list_entry(EXT4_I(inode)->i_aio_dio_complete_list.next, while (!list_empty(&ei->i_completed_io_list)){
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list); ext4_io_end_t, list);
/* /*
* Calling ext4_end_aio_dio_nolock() to convert completed * Calling ext4_end_io_nolock() to convert completed
* IO to written. * IO to written.
* *
* When ext4_sync_file() is called, run_queue() may already * When ext4_sync_file() is called, run_queue() may already
@ -3635,20 +3713,23 @@ int flush_aio_dio_completed_IO(struct inode *inode)
* avoid double converting from both fsync and background work * avoid double converting from both fsync and background work
* queue work. * queue work.
*/ */
ret = ext4_end_aio_dio_nolock(io); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
ret = ext4_end_io_nolock(io);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (ret < 0) if (ret < 0)
ret2 = ret; ret2 = ret;
else else
list_del_init(&io->list); list_del_init(&io->list);
} }
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0; return (ret2 < 0) ? ret2 : 0;
} }
static ext4_io_end_t *ext4_init_io_end (struct inode *inode) static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
{ {
ext4_io_end_t *io = NULL; ext4_io_end_t *io = NULL;
io = kmalloc(sizeof(*io), GFP_NOFS); io = kmalloc(sizeof(*io), flags);
if (io) { if (io) {
igrab(inode); igrab(inode);
@ -3656,8 +3737,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode)
io->flag = 0; io->flag = 0;
io->offset = 0; io->offset = 0;
io->size = 0; io->size = 0;
io->error = 0; io->page = NULL;
INIT_WORK(&io->work, ext4_end_aio_dio_work); INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list); INIT_LIST_HEAD(&io->list);
} }
@ -3669,6 +3750,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
{ {
ext4_io_end_t *io_end = iocb->private; ext4_io_end_t *io_end = iocb->private;
struct workqueue_struct *wq; struct workqueue_struct *wq;
unsigned long flags;
struct ext4_inode_info *ei;
/* if not async direct IO or dio with 0 bytes write, just return */ /* if not async direct IO or dio with 0 bytes write, just return */
if (!io_end || !size) if (!io_end || !size)
@ -3680,7 +3763,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
size); size);
/* if not aio dio with unwritten extents, just free io and return */ /* if not aio dio with unwritten extents, just free io and return */
if (io_end->flag != DIO_AIO_UNWRITTEN){ if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end); ext4_free_io_end(io_end);
iocb->private = NULL; iocb->private = NULL;
return; return;
@ -3688,16 +3771,85 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
io_end->offset = offset; io_end->offset = offset;
io_end->size = size; io_end->size = size;
io_end->flag = EXT4_IO_UNWRITTEN;
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */ /* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work); queue_work(wq, &io_end->work);
/* Add the io_end to per-inode completed aio dio list*/ /* Add the io_end to per-inode completed aio dio list*/
list_add_tail(&io_end->list, ei = EXT4_I(io_end->inode);
&EXT4_I(io_end->inode)->i_aio_dio_complete_list); spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL; iocb->private = NULL;
} }
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
{
ext4_io_end_t *io_end = bh->b_private;
struct workqueue_struct *wq;
struct inode *inode;
unsigned long flags;
if (!test_clear_buffer_uninit(bh) || !io_end)
goto out;
if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
printk("sb umounted, discard end_io request for inode %lu\n",
io_end->inode->i_ino);
ext4_free_io_end(io_end);
goto out;
}
io_end->flag = EXT4_IO_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
out:
bh->b_private = NULL;
bh->b_end_io = NULL;
clear_buffer_uninit(bh);
end_buffer_async_write(bh, uptodate);
}
static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode)
{
ext4_io_end_t *io_end;
struct page *page = bh->b_page;
loff_t offset = (sector_t)page->index << PAGE_CACHE_SHIFT;
size_t size = bh->b_size;
retry:
io_end = ext4_init_io_end(inode, GFP_ATOMIC);
if (!io_end) {
if (printk_ratelimit())
printk(KERN_WARNING "%s: allocation fail\n", __func__);
schedule();
goto retry;
}
io_end->offset = offset;
io_end->size = size;
/*
* We need to hold a reference to the page to make sure it
* doesn't get evicted before ext4_end_io_work() has a chance
* to convert the extent from written to unwritten.
*/
io_end->page = page;
get_page(io_end->page);
bh->b_private = io_end;
bh->b_end_io = ext4_end_io_buffer_write;
return 0;
}
/* /*
* For ext4 extent files, ext4 will do direct-io write to holes, * For ext4 extent files, ext4 will do direct-io write to holes,
* preallocated extents, and those write extend the file, no need to * preallocated extents, and those write extend the file, no need to
@ -3751,7 +3903,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
iocb->private = NULL; iocb->private = NULL;
EXT4_I(inode)->cur_aio_dio = NULL; EXT4_I(inode)->cur_aio_dio = NULL;
if (!is_sync_kiocb(iocb)) { if (!is_sync_kiocb(iocb)) {
iocb->private = ext4_init_io_end(inode); iocb->private = ext4_init_io_end(inode, GFP_NOFS);
if (!iocb->private) if (!iocb->private)
return -ENOMEM; return -ENOMEM;
/* /*
@ -3767,7 +3919,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ret = blockdev_direct_IO(rw, iocb, inode, ret = blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov, inode->i_sb->s_bdev, iov,
offset, nr_segs, offset, nr_segs,
ext4_get_block_dio_write, ext4_get_block_write,
ext4_end_io_dio); ext4_end_io_dio);
if (iocb->private) if (iocb->private)
EXT4_I(inode)->cur_aio_dio = NULL; EXT4_I(inode)->cur_aio_dio = NULL;
@ -3788,8 +3940,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
ext4_free_io_end(iocb->private); ext4_free_io_end(iocb->private);
iocb->private = NULL; iocb->private = NULL;
} else if (ret > 0 && (EXT4_I(inode)->i_state & } else if (ret > 0 && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) { EXT4_STATE_DIO_UNWRITTEN)) {
int err; int err;
/* /*
* for non AIO case, since the IO is already * for non AIO case, since the IO is already
@ -3799,7 +3951,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
offset, ret); offset, ret);
if (err < 0) if (err < 0)
ret = err; ret = err;
EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
} }
return ret; return ret;
} }
@ -4130,18 +4282,27 @@ no_top:
* We release `count' blocks on disk, but (last - first) may be greater * We release `count' blocks on disk, but (last - first) may be greater
* than `count' because there can be holes in there. * than `count' because there can be holes in there.
*/ */
static void ext4_clear_blocks(handle_t *handle, struct inode *inode, static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, struct buffer_head *bh,
ext4_fsblk_t block_to_free, ext4_fsblk_t block_to_free,
unsigned long count, __le32 *first, unsigned long count, __le32 *first,
__le32 *last) __le32 *last)
{ {
__le32 *p; __le32 *p;
int flags = EXT4_FREE_BLOCKS_FORGET; int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
flags |= EXT4_FREE_BLOCKS_METADATA; flags |= EXT4_FREE_BLOCKS_METADATA;
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
count)) {
ext4_error(inode->i_sb, "inode #%lu: "
"attempt to clear blocks %llu len %lu, invalid",
inode->i_ino, (unsigned long long) block_to_free,
count);
return 1;
}
if (try_to_extend_transaction(handle, inode)) { if (try_to_extend_transaction(handle, inode)) {
if (bh) { if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@ -4160,6 +4321,7 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
*p = 0; *p = 0;
ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
return 0;
} }
/** /**
@ -4215,9 +4377,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
} else if (nr == block_to_free + count) { } else if (nr == block_to_free + count) {
count++; count++;
} else { } else {
ext4_clear_blocks(handle, inode, this_bh, if (ext4_clear_blocks(handle, inode, this_bh,
block_to_free, block_to_free, count,
count, block_to_free_p, p); block_to_free_p, p))
break;
block_to_free = nr; block_to_free = nr;
block_to_free_p = p; block_to_free_p = p;
count = 1; count = 1;
@ -4241,7 +4404,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh)) if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
ext4_handle_dirty_metadata(handle, inode, this_bh); ext4_handle_dirty_metadata(handle, inode, this_bh);
else else
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb,
"circular indirect block detected, " "circular indirect block detected, "
"inode=%lu, block=%llu", "inode=%lu, block=%llu",
inode->i_ino, inode->i_ino,
@ -4281,6 +4444,16 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr) if (!nr)
continue; /* A hole */ continue; /* A hole */
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
nr, 1)) {
ext4_error(inode->i_sb,
"indirect mapped block in inode "
"#%lu invalid (level %d, blk #%lu)",
inode->i_ino, depth,
(unsigned long) nr);
break;
}
/* Go read the buffer for the next level down */ /* Go read the buffer for the next level down */
bh = sb_bread(inode->i_sb, nr); bh = sb_bread(inode->i_sb, nr);
@ -4289,7 +4462,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* (should be rare). * (should be rare).
*/ */
if (!bh) { if (!bh) {
ext4_error(inode->i_sb, "ext4_free_branches", ext4_error(inode->i_sb,
"Read failure, inode=%lu, block=%llu", "Read failure, inode=%lu, block=%llu",
inode->i_ino, nr); inode->i_ino, nr);
continue; continue;
@ -4433,8 +4606,10 @@ void ext4_truncate(struct inode *inode)
if (!ext4_can_truncate(inode)) if (!ext4_can_truncate(inode))
return; return;
EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
ext4_ext_truncate(inode); ext4_ext_truncate(inode);
@ -4604,9 +4779,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
bh = sb_getblk(sb, block); bh = sb_getblk(sb, block);
if (!bh) { if (!bh) {
ext4_error(sb, "ext4_get_inode_loc", "unable to read " ext4_error(sb, "unable to read inode block - "
"inode block - inode=%lu, block=%llu", "inode=%lu, block=%llu", inode->i_ino, block);
inode->i_ino, block);
return -EIO; return -EIO;
} }
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
@ -4704,9 +4878,8 @@ make_io:
submit_bh(READ_META, bh); submit_bh(READ_META, bh);
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
ext4_error(sb, __func__, ext4_error(sb, "unable to read inode block - inode=%lu,"
"unable to read inode block - inode=%lu, " " block=%llu", inode->i_ino, block);
"block=%llu", inode->i_ino, block);
brelse(bh); brelse(bh);
return -EIO; return -EIO;
} }
@ -4720,7 +4893,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
{ {
/* We have all inode data except xattrs in memory here. */ /* We have all inode data except xattrs in memory here. */
return __ext4_get_inode_loc(inode, iloc, return __ext4_get_inode_loc(inode, iloc,
!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); !ext4_test_inode_state(inode, EXT4_STATE_XATTR));
} }
void ext4_set_inode_flags(struct inode *inode) void ext4_set_inode_flags(struct inode *inode)
@ -4814,7 +4987,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
} }
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
ei->i_state = 0; ei->i_state_flags = 0;
ei->i_dir_start_lookup = 0; ei->i_dir_start_lookup = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not. /* We now have enough fields to check if the inode was active or not.
@ -4897,7 +5070,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
EXT4_GOOD_OLD_INODE_SIZE + EXT4_GOOD_OLD_INODE_SIZE +
ei->i_extra_isize; ei->i_extra_isize;
if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
ei->i_state |= EXT4_STATE_XATTR; ext4_set_inode_state(inode, EXT4_STATE_XATTR);
} }
} else } else
ei->i_extra_isize = 0; ei->i_extra_isize = 0;
@ -4917,8 +5090,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0; ret = 0;
if (ei->i_file_acl && if (ei->i_file_acl &&
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
ext4_error(sb, __func__, ext4_error(sb, "bad extended attribute block %llu inode #%lu",
"bad extended attribute block %llu in inode #%lu",
ei->i_file_acl, inode->i_ino); ei->i_file_acl, inode->i_ino);
ret = -EIO; ret = -EIO;
goto bad_inode; goto bad_inode;
@ -4964,8 +5136,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
} else { } else {
ret = -EIO; ret = -EIO;
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
"bogus i_mode (%o) for inode=%lu",
inode->i_mode, inode->i_ino); inode->i_mode, inode->i_ino);
goto bad_inode; goto bad_inode;
} }
@ -5037,7 +5208,7 @@ static int ext4_do_update_inode(handle_t *handle,
/* For fields not not tracking in the in-memory inode, /* For fields not not tracking in the in-memory inode,
* initialise them to zero for new inodes. */ * initialise them to zero for new inodes. */
if (ei->i_state & EXT4_STATE_NEW) if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
ext4_get_inode_flags(ei); ext4_get_inode_flags(ei);
@ -5101,7 +5272,7 @@ static int ext4_do_update_inode(handle_t *handle,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE); EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
sb->s_dirt = 1; sb->s_dirt = 1;
ext4_handle_sync(handle); ext4_handle_sync(handle);
err = ext4_handle_dirty_metadata(handle, inode, err = ext4_handle_dirty_metadata(handle, NULL,
EXT4_SB(sb)->s_sbh); EXT4_SB(sb)->s_sbh);
} }
} }
@ -5130,10 +5301,10 @@ static int ext4_do_update_inode(handle_t *handle,
} }
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, inode, bh); rc = ext4_handle_dirty_metadata(handle, NULL, bh);
if (!err) if (!err)
err = rc; err = rc;
ei->i_state &= ~EXT4_STATE_NEW; ext4_clear_inode_state(inode, EXT4_STATE_NEW);
ext4_update_inode_fsync_trans(handle, inode, 0); ext4_update_inode_fsync_trans(handle, inode, 0);
out_brelse: out_brelse:
@ -5204,10 +5375,8 @@ int ext4_write_inode(struct inode *inode, int wait)
if (wait) if (wait)
sync_dirty_buffer(iloc.bh); sync_dirty_buffer(iloc.bh);
if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb, "IO error syncing inode, "
"IO error syncing inode, " "inode=%lu, block=%llu", inode->i_ino,
"inode=%lu, block=%llu",
inode->i_ino,
(unsigned long long)iloc.bh->b_blocknr); (unsigned long long)iloc.bh->b_blocknr);
err = -EIO; err = -EIO;
} }
@ -5288,7 +5457,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} }
if (S_ISREG(inode->i_mode) && if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { attr->ia_valid & ATTR_SIZE &&
(attr->ia_size < inode->i_size ||
(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
handle_t *handle; handle_t *handle;
handle = ext4_journal_start(inode, 3); handle = ext4_journal_start(inode, 3);
@ -5319,6 +5490,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
goto err_out; goto err_out;
} }
} }
/* ext4_truncate will clear the flag */
if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
ext4_truncate(inode);
} }
rc = inode_setattr(inode, attr); rc = inode_setattr(inode, attr);
@ -5557,8 +5731,8 @@ static int ext4_expand_extra_isize(struct inode *inode,
entry = IFIRST(header); entry = IFIRST(header);
/* No extended attributes present */ /* No extended attributes present */
if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
new_extra_isize); new_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize; EXT4_I(inode)->i_extra_isize = new_extra_isize;
@ -5602,7 +5776,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
err = ext4_reserve_inode_write(handle, inode, &iloc); err = ext4_reserve_inode_write(handle, inode, &iloc);
if (ext4_handle_valid(handle) && if (ext4_handle_valid(handle) &&
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
/* /*
* We need extra buffer credits since we may write into EA block * We need extra buffer credits since we may write into EA block
* with this same handle. If journal_extend fails, then it will * with this same handle. If journal_extend fails, then it will
@ -5616,10 +5790,11 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
sbi->s_want_extra_isize, sbi->s_want_extra_isize,
iloc, handle); iloc, handle);
if (ret) { if (ret) {
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; ext4_set_inode_state(inode,
EXT4_STATE_NO_EXPAND);
if (mnt_count != if (mnt_count !=
le16_to_cpu(sbi->s_es->s_mnt_count)) { le16_to_cpu(sbi->s_es->s_mnt_count)) {
ext4_warning(inode->i_sb, __func__, ext4_warning(inode->i_sb,
"Unable to expand inode %lu. Delete" "Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.", " some EAs or run e2fsck.",
inode->i_ino); inode->i_ino);
@ -5683,7 +5858,7 @@ static int ext4_pin_inode(handle_t *handle, struct inode *inode)
err = jbd2_journal_get_write_access(handle, iloc.bh); err = jbd2_journal_get_write_access(handle, iloc.bh);
if (!err) if (!err)
err = ext4_handle_dirty_metadata(handle, err = ext4_handle_dirty_metadata(handle,
inode, NULL,
iloc.bh); iloc.bh);
brelse(iloc.bh); brelse(iloc.bh);
} }

View File

@ -92,6 +92,15 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags &= ~EXT4_EXTENTS_FL; flags &= ~EXT4_EXTENTS_FL;
} }
if (flags & EXT4_EOFBLOCKS_FL) {
/* we don't support adding EOFBLOCKS flag */
if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
err = -EOPNOTSUPP;
goto flags_out;
}
} else if (oldflags & EXT4_EOFBLOCKS_FL)
ext4_truncate(inode);
handle = ext4_journal_start(inode, 1); handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
err = PTR_ERR(handle); err = PTR_ERR(handle);
@ -249,7 +258,8 @@ setversion_out:
if (me.moved_len > 0) if (me.moved_len > 0)
file_remove_suid(donor_filp); file_remove_suid(donor_filp);
if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) if (copy_to_user((struct move_extent __user *)arg,
&me, sizeof(me)))
err = -EFAULT; err = -EFAULT;
mext_out: mext_out:
fput(donor_filp); fput(donor_filp);

View File

@ -441,10 +441,9 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
ext4_fsblk_t blocknr; ext4_fsblk_t blocknr;
blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += first + i; blocknr += first + i;
blocknr +=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
ext4_grp_locked_error(sb, e4b->bd_group, ext4_grp_locked_error(sb, e4b->bd_group,
__func__, "double-free of inode" __func__, "double-free of inode"
" %lu's block %llu(bit %u in group %u)", " %lu's block %llu(bit %u in group %u)",
@ -1255,10 +1254,9 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) { if (!mb_test_bit(block, EXT4_MB_BITMAP(e4b))) {
ext4_fsblk_t blocknr; ext4_fsblk_t blocknr;
blocknr = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb);
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += block; blocknr += block;
blocknr +=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
ext4_grp_locked_error(sb, e4b->bd_group, ext4_grp_locked_error(sb, e4b->bd_group,
__func__, "double-free of inode" __func__, "double-free of inode"
" %lu's block %llu(bit %u in group %u)", " %lu's block %llu(bit %u in group %u)",
@ -1631,7 +1629,6 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
int max; int max;
int err; int err;
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_super_block *es = sbi->s_es;
struct ext4_free_extent ex; struct ext4_free_extent ex;
if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
@ -1648,8 +1645,8 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
ext4_fsblk_t start; ext4_fsblk_t start;
start = (e4b->bd_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + start = ext4_group_first_block_no(ac->ac_sb, e4b->bd_group) +
ex.fe_start + le32_to_cpu(es->s_first_data_block); ex.fe_start;
/* use do_div to get remainder (would be 64-bit modulo) */ /* use do_div to get remainder (would be 64-bit modulo) */
if (do_div(start, sbi->s_stripe) == 0) { if (do_div(start, sbi->s_stripe) == 0) {
ac->ac_found++; ac->ac_found++;
@ -1803,8 +1800,8 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
BUG_ON(sbi->s_stripe == 0); BUG_ON(sbi->s_stripe == 0);
/* find first stripe-aligned block in group */ /* find first stripe-aligned block in group */
first_group_block = e4b->bd_group * EXT4_BLOCKS_PER_GROUP(sb) first_group_block = ext4_group_first_block_no(sb, e4b->bd_group);
+ le32_to_cpu(sbi->s_es->s_first_data_block);
a = first_group_block + sbi->s_stripe - 1; a = first_group_block + sbi->s_stripe - 1;
do_div(a, sbi->s_stripe); do_div(a, sbi->s_stripe);
i = (a * sbi->s_stripe) - first_group_block; i = (a * sbi->s_stripe) - first_group_block;
@ -2256,7 +2253,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
init_rwsem(&meta_group_info[i]->alloc_sem); init_rwsem(&meta_group_info[i]->alloc_sem);
meta_group_info[i]->bb_free_root.rb_node = NULL; meta_group_info[i]->bb_free_root = RB_ROOT;
#ifdef DOUBLE_CHECK #ifdef DOUBLE_CHECK
{ {
@ -2560,12 +2557,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
ext4_unlock_group(sb, entry->group); ext4_unlock_group(sb, entry->group);
if (test_opt(sb, DISCARD)) { if (test_opt(sb, DISCARD)) {
ext4_fsblk_t discard_block; ext4_fsblk_t discard_block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
discard_block = (ext4_fsblk_t)entry->group * discard_block = entry->start_blk +
EXT4_BLOCKS_PER_GROUP(sb) ext4_group_first_block_no(sb, entry->group);
+ entry->start_blk
+ le32_to_cpu(es->s_first_data_block);
trace_ext4_discard_blocks(sb, trace_ext4_discard_blocks(sb,
(unsigned long long)discard_block, (unsigned long long)discard_block,
entry->count); entry->count);
@ -2703,14 +2697,11 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
if (err) if (err)
goto out_err; goto out_err;
block = ac->ac_b_ex.fe_group * EXT4_BLOCKS_PER_GROUP(sb) block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+ ac->ac_b_ex.fe_start
+ le32_to_cpu(es->s_first_data_block);
len = ac->ac_b_ex.fe_len; len = ac->ac_b_ex.fe_len;
if (!ext4_data_block_valid(sbi, block, len)) { if (!ext4_data_block_valid(sbi, block, len)) {
ext4_error(sb, __func__, ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"Allocating blocks %llu-%llu which overlap "
"fs metadata\n", block, block+len); "fs metadata\n", block, block+len);
/* File system mounted not to panic on error /* File system mounted not to panic on error
* Fix the bitmap and repeat the block allocation * Fix the bitmap and repeat the block allocation
@ -3161,9 +3152,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* The max size of hash table is PREALLOC_TB_SIZE */ /* The max size of hash table is PREALLOC_TB_SIZE */
order = PREALLOC_TB_SIZE - 1; order = PREALLOC_TB_SIZE - 1;
goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + goal_block = ext4_grp_offs_to_block(ac->ac_sb, &ac->ac_g_ex);
ac->ac_g_ex.fe_start +
le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block);
/* /*
* search for the prealloc space that is having * search for the prealloc space that is having
* minimal distance from the goal block. * minimal distance from the goal block.
@ -3526,8 +3515,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
if (bit >= end) if (bit >= end)
break; break;
next = mb_find_next_bit(bitmap_bh->b_data, end, bit); next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + start = ext4_group_first_block_no(sb, group) + bit;
le32_to_cpu(sbi->s_es->s_first_data_block);
mb_debug(1, " free preallocated %u/%u in group %u\n", mb_debug(1, " free preallocated %u/%u in group %u\n",
(unsigned) start, (unsigned) next - bit, (unsigned) start, (unsigned) next - bit,
(unsigned) group); (unsigned) group);
@ -3623,15 +3611,13 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
bitmap_bh = ext4_read_block_bitmap(sb, group); bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) { if (bitmap_bh == NULL) {
ext4_error(sb, __func__, "Error in reading block " ext4_error(sb, "Error reading block bitmap for %u", group);
"bitmap for %u", group);
return 0; return 0;
} }
err = ext4_mb_load_buddy(sb, group, &e4b); err = ext4_mb_load_buddy(sb, group, &e4b);
if (err) { if (err) {
ext4_error(sb, __func__, "Error in loading buddy " ext4_error(sb, "Error loading buddy information for %u", group);
"information for %u", group);
put_bh(bitmap_bh); put_bh(bitmap_bh);
return 0; return 0;
} }
@ -3804,15 +3790,15 @@ repeat:
err = ext4_mb_load_buddy(sb, group, &e4b); err = ext4_mb_load_buddy(sb, group, &e4b);
if (err) { if (err) {
ext4_error(sb, __func__, "Error in loading buddy " ext4_error(sb, "Error loading buddy information for %u",
"information for %u", group); group);
continue; continue;
} }
bitmap_bh = ext4_read_block_bitmap(sb, group); bitmap_bh = ext4_read_block_bitmap(sb, group);
if (bitmap_bh == NULL) { if (bitmap_bh == NULL) {
ext4_error(sb, __func__, "Error in reading block " ext4_error(sb, "Error reading block bitmap for %u",
"bitmap for %u", group); group);
ext4_mb_release_desc(&e4b); ext4_mb_release_desc(&e4b);
continue; continue;
} }
@ -3938,7 +3924,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
/* don't use group allocation for large files */ /* don't use group allocation for large files */
size = max(size, isize); size = max(size, isize);
if (size >= sbi->s_mb_stream_request) { if (size > sbi->s_mb_stream_request) {
ac->ac_flags |= EXT4_MB_STREAM_ALLOC; ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
return; return;
} }
@ -4077,8 +4063,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
if (ext4_mb_load_buddy(sb, group, &e4b)) { if (ext4_mb_load_buddy(sb, group, &e4b)) {
ext4_error(sb, __func__, "Error in loading buddy " ext4_error(sb, "Error loading buddy information for %u",
"information for %u", group); group);
continue; continue;
} }
ext4_lock_group(sb, group); ext4_lock_group(sb, group);
@ -4476,10 +4462,10 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es; es = EXT4_SB(sb)->s_es;
if (!ext4_data_block_valid(sbi, block, count)) { if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
ext4_error(sb, __func__, !ext4_data_block_valid(sbi, block, count)) {
"Freeing blocks not in datazone - " ext4_error(sb, "Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count); "block = %llu, count = %lu", block, count);
goto error_return; goto error_return;
} }
@ -4547,8 +4533,7 @@ do_more:
in_range(block + count - 1, ext4_inode_table(sb, gdp), in_range(block + count - 1, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group)) { EXT4_SB(sb)->s_itb_per_group)) {
ext4_error(sb, __func__, ext4_error(sb, "Freeing blocks in system zone - "
"Freeing blocks in system zone - "
"Block = %llu, count = %lu", block, count); "Block = %llu, count = %lu", block, count);
/* err = 0. ext4_std_error should be a no op */ /* err = 0. ext4_std_error should be a no op */
goto error_return; goto error_return;

View File

@ -220,16 +220,9 @@ struct ext4_buddy {
#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
struct ext4_free_extent *fex) struct ext4_free_extent *fex)
{ {
ext4_fsblk_t block; return ext4_group_first_block_no(sb, fex->fe_group) + fex->fe_start;
block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
+ fex->fe_start
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
return block;
} }
#endif #endif

View File

@ -365,12 +365,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* happened after we started the migrate. We need to * happened after we started the migrate. We need to
* fail the migrate * fail the migrate
*/ */
if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) {
retval = -EAGAIN; retval = -EAGAIN;
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
goto err_out; goto err_out;
} else } else
EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
/* /*
* We have the extent map build with the tmp inode. * We have the extent map build with the tmp inode.
* Now copy the i_data across * Now copy the i_data across
@ -503,14 +503,10 @@ int ext4_ext_migrate(struct inode *inode)
} }
i_size_write(tmp_inode, i_size_read(inode)); i_size_write(tmp_inode, i_size_read(inode));
/* /*
* We don't want the inode to be reclaimed * Set the i_nlink to zero so it will be deleted later
* if we got interrupted in between. We have * when we drop inode reference.
* this tmp inode carrying reference to the
* data blocks of the original file. We set
* the i_nlink to zero at the last stage after
* switching the original file to extent format
*/ */
tmp_inode->i_nlink = 1; tmp_inode->i_nlink = 0;
ext4_ext_tree_init(handle, tmp_inode); ext4_ext_tree_init(handle, tmp_inode);
ext4_orphan_add(handle, tmp_inode); ext4_orphan_add(handle, tmp_inode);
@ -533,10 +529,20 @@ int ext4_ext_migrate(struct inode *inode)
* allocation. * allocation.
*/ */
down_read((&EXT4_I(inode)->i_data_sem)); down_read((&EXT4_I(inode)->i_data_sem));
EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
handle = ext4_journal_start(inode, 1); handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) {
/*
* It is impossible to update on-disk structures without
* a handle, so just rollback in-core changes and live other
* work to orphan_list_cleanup()
*/
ext4_orphan_del(NULL, tmp_inode);
retval = PTR_ERR(handle);
goto out;
}
ei = EXT4_I(inode); ei = EXT4_I(inode);
i_data = ei->i_data; i_data = ei->i_data;
@ -618,15 +624,8 @@ err_out:
/* Reset the extent details */ /* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode); ext4_ext_tree_init(handle, tmp_inode);
/*
* Set the i_nlink to zero so that
* generic_drop_inode really deletes the
* inode
*/
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle); ext4_journal_stop(handle);
out:
unlock_new_inode(tmp_inode); unlock_new_inode(tmp_inode);
iput(tmp_inode); iput(tmp_inode);

View File

@ -152,12 +152,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
int ret = 0; int ret = 0;
if (inode1 == NULL) { if (inode1 == NULL) {
ext4_error(inode2->i_sb, function, __ext4_error(inode2->i_sb, function,
"Both inodes should not be NULL: " "Both inodes should not be NULL: "
"inode1 NULL inode2 %lu", inode2->i_ino); "inode1 NULL inode2 %lu", inode2->i_ino);
ret = -EIO; ret = -EIO;
} else if (inode2 == NULL) { } else if (inode2 == NULL) {
ext4_error(inode1->i_sb, function, __ext4_error(inode1->i_sb, function,
"Both inodes should not be NULL: " "Both inodes should not be NULL: "
"inode1 %lu inode2 NULL", inode1->i_ino); "inode1 %lu inode2 NULL", inode1->i_ino);
ret = -EIO; ret = -EIO;
@ -252,6 +252,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
} }
o_start->ee_len = start_ext->ee_len; o_start->ee_len = start_ext->ee_len;
eblock = le32_to_cpu(start_ext->ee_block);
new_flag = 1; new_flag = 1;
} else if (start_ext->ee_len && new_ext->ee_len && } else if (start_ext->ee_len && new_ext->ee_len &&
@ -262,6 +263,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
* orig |------------------------------| * orig |------------------------------|
*/ */
o_start->ee_len = start_ext->ee_len; o_start->ee_len = start_ext->ee_len;
eblock = le32_to_cpu(start_ext->ee_block);
new_flag = 1; new_flag = 1;
} else if (!start_ext->ee_len && new_ext->ee_len && } else if (!start_ext->ee_len && new_ext->ee_len &&
@ -475,7 +477,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
struct ext4_extent *oext, *o_start, *o_end, *prev_ext; struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
struct ext4_extent new_ext, start_ext, end_ext; struct ext4_extent new_ext, start_ext, end_ext;
ext4_lblk_t new_ext_end; ext4_lblk_t new_ext_end;
ext4_fsblk_t new_phys_end;
int oext_alen, new_ext_alen, end_ext_alen; int oext_alen, new_ext_alen, end_ext_alen;
int depth = ext_depth(orig_inode); int depth = ext_depth(orig_inode);
int ret; int ret;
@ -489,7 +490,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
new_ext.ee_len = dext->ee_len; new_ext.ee_len = dext->ee_len;
new_ext_alen = ext4_ext_get_actual_len(&new_ext); new_ext_alen = ext4_ext_get_actual_len(&new_ext);
new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
/* /*
* Case: original extent is first * Case: original extent is first
@ -502,6 +502,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
le32_to_cpu(oext->ee_block) + oext_alen) { le32_to_cpu(oext->ee_block) + oext_alen) {
start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
le32_to_cpu(oext->ee_block)); le32_to_cpu(oext->ee_block));
start_ext.ee_block = oext->ee_block;
copy_extent_status(oext, &start_ext); copy_extent_status(oext, &start_ext);
} else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
prev_ext = oext - 1; prev_ext = oext - 1;
@ -515,6 +516,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
start_ext.ee_len = cpu_to_le16( start_ext.ee_len = cpu_to_le16(
ext4_ext_get_actual_len(prev_ext) + ext4_ext_get_actual_len(prev_ext) +
new_ext_alen); new_ext_alen);
start_ext.ee_block = oext->ee_block;
copy_extent_status(prev_ext, &start_ext); copy_extent_status(prev_ext, &start_ext);
new_ext.ee_len = 0; new_ext.ee_len = 0;
} }
@ -526,7 +528,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
* new_ext |-------| * new_ext |-------|
*/ */
if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
ext4_error(orig_inode->i_sb, __func__, ext4_error(orig_inode->i_sb,
"new_ext_end(%u) should be less than or equal to " "new_ext_end(%u) should be less than or equal to "
"oext->ee_block(%u) + oext_alen(%d) - 1", "oext->ee_block(%u) + oext_alen(%d) - 1",
new_ext_end, le32_to_cpu(oext->ee_block), new_ext_end, le32_to_cpu(oext->ee_block),
@ -689,12 +691,12 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
while (1) { while (1) {
/* The extent for donor must be found. */ /* The extent for donor must be found. */
if (!dext) { if (!dext) {
ext4_error(donor_inode->i_sb, __func__, ext4_error(donor_inode->i_sb,
"The extent for donor must be found"); "The extent for donor must be found");
*err = -EIO; *err = -EIO;
goto out; goto out;
} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
ext4_error(donor_inode->i_sb, __func__, ext4_error(donor_inode->i_sb,
"Donor offset(%u) and the first block of donor " "Donor offset(%u) and the first block of donor "
"extent(%u) should be equal", "extent(%u) should be equal",
donor_off, donor_off,
@ -928,7 +930,7 @@ out2:
} }
/** /**
* mext_check_argumants - Check whether move extent can be done * mext_check_arguments - Check whether move extent can be done
* *
* @orig_inode: original inode * @orig_inode: original inode
* @donor_inode: donor inode * @donor_inode: donor inode
@ -949,14 +951,6 @@ mext_check_arguments(struct inode *orig_inode,
unsigned int blkbits = orig_inode->i_blkbits; unsigned int blkbits = orig_inode->i_blkbits;
unsigned int blocksize = 1 << blkbits; unsigned int blocksize = 1 << blkbits;
/* Regular file check */
if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
ext4_debug("ext4 move extent: The argument files should be "
"regular file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
ext4_debug("ext4 move extent: suid or sgid is set" ext4_debug("ext4 move extent: suid or sgid is set"
" to donor file [ino:orig %lu, donor %lu]\n", " to donor file [ino:orig %lu, donor %lu]\n",
@ -1204,6 +1198,14 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
return -EINVAL; return -EINVAL;
} }
/* Regular file check */
if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
ext4_debug("ext4 move extent: The argument files should be "
"regular file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Protect orig and donor inodes against a truncate */ /* Protect orig and donor inodes against a truncate */
ret1 = mext_inode_double_lock(orig_inode, donor_inode); ret1 = mext_inode_double_lock(orig_inode, donor_inode);
if (ret1 < 0) if (ret1 < 0)
@ -1351,7 +1353,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
if (ret1 < 0) if (ret1 < 0)
break; break;
if (*moved_len > len) { if (*moved_len > len) {
ext4_error(orig_inode->i_sb, __func__, ext4_error(orig_inode->i_sb,
"We replaced blocks too much! " "We replaced blocks too much! "
"sum of replaced: %llu requested: %llu", "sum of replaced: %llu requested: %llu",
*moved_len, len); *moved_len, len);

View File

@ -383,8 +383,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
if (root->info.hash_version != DX_HASH_TEA && if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 && root->info.hash_version != DX_HASH_HALF_MD4 &&
root->info.hash_version != DX_HASH_LEGACY) { root->info.hash_version != DX_HASH_LEGACY) {
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
"Unrecognised inode hash code %d",
root->info.hash_version); root->info.hash_version);
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
@ -399,8 +398,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
hash = hinfo->hash; hash = hinfo->hash;
if (root->info.unused_flags & 1) { if (root->info.unused_flags & 1) {
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
"Unimplemented inode hash flags: %#06x",
root->info.unused_flags); root->info.unused_flags);
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
@ -408,8 +406,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
} }
if ((indirect = root->info.indirect_levels) > 1) { if ((indirect = root->info.indirect_levels) > 1) {
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
"Unimplemented inode hash depth: %#06x",
root->info.indirect_levels); root->info.indirect_levels);
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
@ -421,8 +418,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
if (dx_get_limit(entries) != dx_root_limit(dir, if (dx_get_limit(entries) != dx_root_limit(dir,
root->info.info_length)) { root->info.info_length)) {
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb, "dx entry: limit != root limit");
"dx entry: limit != root limit");
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
goto fail; goto fail;
@ -433,7 +429,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
{ {
count = dx_get_count(entries); count = dx_get_count(entries);
if (!count || count > dx_get_limit(entries)) { if (!count || count > dx_get_limit(entries)) {
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb,
"dx entry: no count or count > limit"); "dx entry: no count or count > limit");
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
@ -478,7 +474,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
goto fail2; goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries; at = entries = ((struct dx_node *) bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit (dir)) { if (dx_get_limit(entries) != dx_node_limit (dir)) {
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb,
"dx entry: limit != node limit"); "dx entry: limit != node limit");
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
@ -494,7 +490,7 @@ fail2:
} }
fail: fail:
if (*err == ERR_BAD_DX_DIR) if (*err == ERR_BAD_DX_DIR)
ext4_warning(dir->i_sb, __func__, ext4_warning(dir->i_sb,
"Corrupt dir inode %ld, running e2fsck is " "Corrupt dir inode %ld, running e2fsck is "
"recommended.", dir->i_ino); "recommended.", dir->i_ino);
return NULL; return NULL;
@ -947,9 +943,8 @@ restart:
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */ /* read error, skip block & hope for the best */
ext4_error(sb, __func__, "reading directory #%lu " ext4_error(sb, "reading directory #%lu offset %lu",
"offset %lu", dir->i_ino, dir->i_ino, (unsigned long)block);
(unsigned long)block);
brelse(bh); brelse(bh);
goto next; goto next;
} }
@ -1041,7 +1036,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
retval = ext4_htree_next_block(dir, hash, frame, retval = ext4_htree_next_block(dir, hash, frame,
frames, NULL); frames, NULL);
if (retval < 0) { if (retval < 0) {
ext4_warning(sb, __func__, ext4_warning(sb,
"error reading index page in directory #%lu", "error reading index page in directory #%lu",
dir->i_ino); dir->i_ino);
*err = retval; *err = retval;
@ -1071,14 +1066,13 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
__u32 ino = le32_to_cpu(de->inode); __u32 ino = le32_to_cpu(de->inode);
brelse(bh); brelse(bh);
if (!ext4_valid_inum(dir->i_sb, ino)) { if (!ext4_valid_inum(dir->i_sb, ino)) {
ext4_error(dir->i_sb, "ext4_lookup", ext4_error(dir->i_sb, "bad inode number: %u", ino);
"bad inode number: %u", ino);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
inode = ext4_iget(dir->i_sb, ino); inode = ext4_iget(dir->i_sb, ino);
if (unlikely(IS_ERR(inode))) { if (unlikely(IS_ERR(inode))) {
if (PTR_ERR(inode) == -ESTALE) { if (PTR_ERR(inode) == -ESTALE) {
ext4_error(dir->i_sb, __func__, ext4_error(dir->i_sb,
"deleted inode referenced: %u", "deleted inode referenced: %u",
ino); ino);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
@ -1110,7 +1104,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
brelse(bh); brelse(bh);
if (!ext4_valid_inum(child->d_inode->i_sb, ino)) { if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
ext4_error(child->d_inode->i_sb, "ext4_get_parent", ext4_error(child->d_inode->i_sb,
"bad inode number: %u", ino); "bad inode number: %u", ino);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
@ -1410,7 +1404,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)((char *)fde + de = (struct ext4_dir_entry_2 *)((char *)fde +
ext4_rec_len_from_disk(fde->rec_len, blocksize)); ext4_rec_len_from_disk(fde->rec_len, blocksize));
if ((char *) de >= (((char *) root) + blocksize)) { if ((char *) de >= (((char *) root) + blocksize)) {
ext4_error(dir->i_sb, __func__, ext4_error(dir->i_sb,
"invalid rec_len for '..' in inode %lu", "invalid rec_len for '..' in inode %lu",
dir->i_ino); dir->i_ino);
brelse(bh); brelse(bh);
@ -1575,8 +1569,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (levels && (dx_get_count(frames->entries) == if (levels && (dx_get_count(frames->entries) ==
dx_get_limit(frames->entries))) { dx_get_limit(frames->entries))) {
ext4_warning(sb, __func__, ext4_warning(sb, "Directory index full!");
"Directory index full!");
err = -ENOSPC; err = -ENOSPC;
goto cleanup; goto cleanup;
} }
@ -1916,11 +1909,11 @@ static int empty_dir(struct inode *inode)
if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
!(bh = ext4_bread(NULL, inode, 0, 0, &err))) { !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
if (err) if (err)
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb,
"error %d reading directory #%lu offset 0", "error %d reading directory #%lu offset 0",
err, inode->i_ino); err, inode->i_ino);
else else
ext4_warning(inode->i_sb, __func__, ext4_warning(inode->i_sb,
"bad directory (dir #%lu) - no data block", "bad directory (dir #%lu) - no data block",
inode->i_ino); inode->i_ino);
return 1; return 1;
@ -1931,7 +1924,7 @@ static int empty_dir(struct inode *inode)
!le32_to_cpu(de1->inode) || !le32_to_cpu(de1->inode) ||
strcmp(".", de->name) || strcmp(".", de->name) ||
strcmp("..", de1->name)) { strcmp("..", de1->name)) {
ext4_warning(inode->i_sb, "empty_dir", ext4_warning(inode->i_sb,
"bad directory (dir #%lu) - no `.' or `..'", "bad directory (dir #%lu) - no `.' or `..'",
inode->i_ino); inode->i_ino);
brelse(bh); brelse(bh);
@ -1949,7 +1942,7 @@ static int empty_dir(struct inode *inode)
offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
if (!bh) { if (!bh) {
if (err) if (err)
ext4_error(sb, __func__, ext4_error(sb,
"error %d reading directory" "error %d reading directory"
" #%lu offset %u", " #%lu offset %u",
err, inode->i_ino, offset); err, inode->i_ino, offset);
@ -2020,11 +2013,18 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
err = ext4_reserve_inode_write(handle, inode, &iloc); err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err) if (err)
goto out_unlock; goto out_unlock;
/*
* Due to previous errors inode may be already a part of on-disk
* orphan list. If so skip on-disk list modification.
*/
if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
(le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
goto mem_insert;
/* Insert this inode at the head of the on-disk orphan list... */ /* Insert this inode at the head of the on-disk orphan list... */
NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
err = ext4_handle_dirty_metadata(handle, inode, EXT4_SB(sb)->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
rc = ext4_mark_iloc_dirty(handle, inode, &iloc); rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
if (!err) if (!err)
err = rc; err = rc;
@ -2037,6 +2037,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* *
* This is safe: on error we're going to ignore the orphan list * This is safe: on error we're going to ignore the orphan list
* anyway on the next recovery. */ * anyway on the next recovery. */
mem_insert:
if (!err) if (!err)
list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
@ -2096,7 +2097,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
if (err) if (err)
goto out_brelse; goto out_brelse;
sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
err = ext4_handle_dirty_metadata(handle, inode, sbi->s_sbh); err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
} else { } else {
struct ext4_iloc iloc2; struct ext4_iloc iloc2;
struct inode *i_prev = struct inode *i_prev =
@ -2163,7 +2164,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
if (retval) if (retval)
goto end_rmdir; goto end_rmdir;
if (!EXT4_DIR_LINK_EMPTY(inode)) if (!EXT4_DIR_LINK_EMPTY(inode))
ext4_warning(inode->i_sb, "ext4_rmdir", ext4_warning(inode->i_sb,
"empty directory has too many links (%d)", "empty directory has too many links (%d)",
inode->i_nlink); inode->i_nlink);
inode->i_version++; inode->i_version++;
@ -2215,7 +2216,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
goto end_unlink; goto end_unlink;
if (!inode->i_nlink) { if (!inode->i_nlink) {
ext4_warning(inode->i_sb, "ext4_unlink", ext4_warning(inode->i_sb,
"Deleting nonexistent file (%lu), %d", "Deleting nonexistent file (%lu), %d",
inode->i_ino, inode->i_nlink); inode->i_ino, inode->i_nlink);
inode->i_nlink = 1; inode->i_nlink = 1;
@ -2462,7 +2463,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
} }
} }
if (retval) { if (retval) {
ext4_warning(old_dir->i_sb, "ext4_rename", ext4_warning(old_dir->i_sb,
"Deleting old file (%lu), %d, error=%d", "Deleting old file (%lu), %d, error=%d",
old_dir->i_ino, old_dir->i_nlink, retval); old_dir->i_ino, old_dir->i_nlink, retval);
} }

View File

@ -48,65 +48,54 @@ static int verify_group_input(struct super_block *sb,
ext4_get_group_no_and_offset(sb, start, NULL, &offset); ext4_get_group_no_and_offset(sb, start, NULL, &offset);
if (group != sbi->s_groups_count) if (group != sbi->s_groups_count)
ext4_warning(sb, __func__, ext4_warning(sb, "Cannot add at group %u (only %u groups)",
"Cannot add at group %u (only %u groups)",
input->group, sbi->s_groups_count); input->group, sbi->s_groups_count);
else if (offset != 0) else if (offset != 0)
ext4_warning(sb, __func__, "Last group not full"); ext4_warning(sb, "Last group not full");
else if (input->reserved_blocks > input->blocks_count / 5) else if (input->reserved_blocks > input->blocks_count / 5)
ext4_warning(sb, __func__, "Reserved blocks too high (%u)", ext4_warning(sb, "Reserved blocks too high (%u)",
input->reserved_blocks); input->reserved_blocks);
else if (free_blocks_count < 0) else if (free_blocks_count < 0)
ext4_warning(sb, __func__, "Bad blocks count %u", ext4_warning(sb, "Bad blocks count %u",
input->blocks_count); input->blocks_count);
else if (!(bh = sb_bread(sb, end - 1))) else if (!(bh = sb_bread(sb, end - 1)))
ext4_warning(sb, __func__, ext4_warning(sb, "Cannot read last block (%llu)",
"Cannot read last block (%llu)",
end - 1); end - 1);
else if (outside(input->block_bitmap, start, end)) else if (outside(input->block_bitmap, start, end))
ext4_warning(sb, __func__, ext4_warning(sb, "Block bitmap not in group (block %llu)",
"Block bitmap not in group (block %llu)",
(unsigned long long)input->block_bitmap); (unsigned long long)input->block_bitmap);
else if (outside(input->inode_bitmap, start, end)) else if (outside(input->inode_bitmap, start, end))
ext4_warning(sb, __func__, ext4_warning(sb, "Inode bitmap not in group (block %llu)",
"Inode bitmap not in group (block %llu)",
(unsigned long long)input->inode_bitmap); (unsigned long long)input->inode_bitmap);
else if (outside(input->inode_table, start, end) || else if (outside(input->inode_table, start, end) ||
outside(itend - 1, start, end)) outside(itend - 1, start, end))
ext4_warning(sb, __func__, ext4_warning(sb, "Inode table not in group (blocks %llu-%llu)",
"Inode table not in group (blocks %llu-%llu)",
(unsigned long long)input->inode_table, itend - 1); (unsigned long long)input->inode_table, itend - 1);
else if (input->inode_bitmap == input->block_bitmap) else if (input->inode_bitmap == input->block_bitmap)
ext4_warning(sb, __func__, ext4_warning(sb, "Block bitmap same as inode bitmap (%llu)",
"Block bitmap same as inode bitmap (%llu)",
(unsigned long long)input->block_bitmap); (unsigned long long)input->block_bitmap);
else if (inside(input->block_bitmap, input->inode_table, itend)) else if (inside(input->block_bitmap, input->inode_table, itend))
ext4_warning(sb, __func__, ext4_warning(sb, "Block bitmap (%llu) in inode table "
"Block bitmap (%llu) in inode table (%llu-%llu)", "(%llu-%llu)",
(unsigned long long)input->block_bitmap, (unsigned long long)input->block_bitmap,
(unsigned long long)input->inode_table, itend - 1); (unsigned long long)input->inode_table, itend - 1);
else if (inside(input->inode_bitmap, input->inode_table, itend)) else if (inside(input->inode_bitmap, input->inode_table, itend))
ext4_warning(sb, __func__, ext4_warning(sb, "Inode bitmap (%llu) in inode table "
"Inode bitmap (%llu) in inode table (%llu-%llu)", "(%llu-%llu)",
(unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_bitmap,
(unsigned long long)input->inode_table, itend - 1); (unsigned long long)input->inode_table, itend - 1);
else if (inside(input->block_bitmap, start, metaend)) else if (inside(input->block_bitmap, start, metaend))
ext4_warning(sb, __func__, ext4_warning(sb, "Block bitmap (%llu) in GDT table (%llu-%llu)",
"Block bitmap (%llu) in GDT table"
" (%llu-%llu)",
(unsigned long long)input->block_bitmap, (unsigned long long)input->block_bitmap,
start, metaend - 1); start, metaend - 1);
else if (inside(input->inode_bitmap, start, metaend)) else if (inside(input->inode_bitmap, start, metaend))
ext4_warning(sb, __func__, ext4_warning(sb, "Inode bitmap (%llu) in GDT table (%llu-%llu)",
"Inode bitmap (%llu) in GDT table"
" (%llu-%llu)",
(unsigned long long)input->inode_bitmap, (unsigned long long)input->inode_bitmap,
start, metaend - 1); start, metaend - 1);
else if (inside(input->inode_table, start, metaend) || else if (inside(input->inode_table, start, metaend) ||
inside(itend - 1, start, metaend)) inside(itend - 1, start, metaend))
ext4_warning(sb, __func__, ext4_warning(sb, "Inode table (%llu-%llu) overlaps GDT table "
"Inode table (%llu-%llu) overlaps" "(%llu-%llu)",
"GDT table (%llu-%llu)",
(unsigned long long)input->inode_table, (unsigned long long)input->inode_table,
itend - 1, start, metaend - 1); itend - 1, start, metaend - 1);
else else
@ -364,8 +353,7 @@ static int verify_reserved_gdb(struct super_block *sb,
while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) { while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
if (le32_to_cpu(*p++) != if (le32_to_cpu(*p++) !=
grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){ grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
ext4_warning(sb, __func__, ext4_warning(sb, "reserved GDT %llu"
"reserved GDT %llu"
" missing grp %d (%llu)", " missing grp %d (%llu)",
blk, grp, blk, grp,
grp * grp *
@ -420,8 +408,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
*/ */
if (EXT4_SB(sb)->s_sbh->b_blocknr != if (EXT4_SB(sb)->s_sbh->b_blocknr !=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
ext4_warning(sb, __func__, ext4_warning(sb, "won't resize using backup superblock at %llu",
"won't resize using backup superblock at %llu",
(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr); (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
return -EPERM; return -EPERM;
} }
@ -444,8 +431,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
data = (__le32 *)dind->b_data; data = (__le32 *)dind->b_data;
if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) { if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
ext4_warning(sb, __func__, ext4_warning(sb, "new group %u GDT block %llu not reserved",
"new group %u GDT block %llu not reserved",
input->group, gdblock); input->group, gdblock);
err = -EINVAL; err = -EINVAL;
goto exit_dind; goto exit_dind;
@ -468,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
GFP_NOFS); GFP_NOFS);
if (!n_group_desc) { if (!n_group_desc) {
err = -ENOMEM; err = -ENOMEM;
ext4_warning(sb, __func__, ext4_warning(sb,
"not enough memory for %lu groups", gdb_num + 1); "not enough memory for %lu groups", gdb_num + 1);
goto exit_inode; goto exit_inode;
} }
@ -567,8 +553,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
/* Get each reserved primary GDT block and verify it holds backups */ /* Get each reserved primary GDT block and verify it holds backups */
for (res = 0; res < reserved_gdb; res++, blk++) { for (res = 0; res < reserved_gdb; res++, blk++) {
if (le32_to_cpu(*data) != blk) { if (le32_to_cpu(*data) != blk) {
ext4_warning(sb, __func__, ext4_warning(sb, "reserved block %llu"
"reserved block %llu"
" not at offset %ld", " not at offset %ld",
blk, blk,
(long)(data - (__le32 *)dind->b_data)); (long)(data - (__le32 *)dind->b_data));
@ -713,8 +698,7 @@ static void update_backups(struct super_block *sb,
*/ */
exit_err: exit_err:
if (err) { if (err) {
ext4_warning(sb, __func__, ext4_warning(sb, "can't update backup for group %u (err %d), "
"can't update backup for group %u (err %d), "
"forcing fsck on next reboot", group, err); "forcing fsck on next reboot", group, err);
sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_mount_state &= ~EXT4_VALID_FS;
sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@ -753,20 +737,19 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) { EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
ext4_warning(sb, __func__, ext4_warning(sb, "Can't resize non-sparse filesystem further");
"Can't resize non-sparse filesystem further");
return -EPERM; return -EPERM;
} }
if (ext4_blocks_count(es) + input->blocks_count < if (ext4_blocks_count(es) + input->blocks_count <
ext4_blocks_count(es)) { ext4_blocks_count(es)) {
ext4_warning(sb, __func__, "blocks_count overflow"); ext4_warning(sb, "blocks_count overflow");
return -EINVAL; return -EINVAL;
} }
if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) < if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
le32_to_cpu(es->s_inodes_count)) { le32_to_cpu(es->s_inodes_count)) {
ext4_warning(sb, __func__, "inodes_count overflow"); ext4_warning(sb, "inodes_count overflow");
return -EINVAL; return -EINVAL;
} }
@ -774,14 +757,13 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if (!EXT4_HAS_COMPAT_FEATURE(sb, if (!EXT4_HAS_COMPAT_FEATURE(sb,
EXT4_FEATURE_COMPAT_RESIZE_INODE) EXT4_FEATURE_COMPAT_RESIZE_INODE)
|| !le16_to_cpu(es->s_reserved_gdt_blocks)) { || !le16_to_cpu(es->s_reserved_gdt_blocks)) {
ext4_warning(sb, __func__, ext4_warning(sb,
"No reserved GDT blocks, can't resize"); "No reserved GDT blocks, can't resize");
return -EPERM; return -EPERM;
} }
inode = ext4_iget(sb, EXT4_RESIZE_INO); inode = ext4_iget(sb, EXT4_RESIZE_INO);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
ext4_warning(sb, __func__, ext4_warning(sb, "Error opening resize inode");
"Error opening resize inode");
return PTR_ERR(inode); return PTR_ERR(inode);
} }
} }
@ -810,8 +792,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
mutex_lock(&sbi->s_resize_lock); mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) { if (input->group != sbi->s_groups_count) {
ext4_warning(sb, __func__, ext4_warning(sb, "multiple resizers run on filesystem!");
"multiple resizers run on filesystem!");
err = -EBUSY; err = -EBUSY;
goto exit_journal; goto exit_journal;
} }
@ -997,13 +978,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
" too large to resize to %llu blocks safely\n", " too large to resize to %llu blocks safely\n",
sb->s_id, n_blocks_count); sb->s_id, n_blocks_count);
if (sizeof(sector_t) < 8) if (sizeof(sector_t) < 8)
ext4_warning(sb, __func__, "CONFIG_LBDAF not enabled"); ext4_warning(sb, "CONFIG_LBDAF not enabled");
return -EINVAL; return -EINVAL;
} }
if (n_blocks_count < o_blocks_count) { if (n_blocks_count < o_blocks_count) {
ext4_warning(sb, __func__, ext4_warning(sb, "can't shrink FS - resize aborted");
"can't shrink FS - resize aborted");
return -EBUSY; return -EBUSY;
} }
@ -1011,15 +991,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
if (last == 0) { if (last == 0) {
ext4_warning(sb, __func__, ext4_warning(sb, "need to use ext2online to resize further");
"need to use ext2online to resize further");
return -EPERM; return -EPERM;
} }
add = EXT4_BLOCKS_PER_GROUP(sb) - last; add = EXT4_BLOCKS_PER_GROUP(sb) - last;
if (o_blocks_count + add < o_blocks_count) { if (o_blocks_count + add < o_blocks_count) {
ext4_warning(sb, __func__, "blocks_count overflow"); ext4_warning(sb, "blocks_count overflow");
return -EINVAL; return -EINVAL;
} }
@ -1027,16 +1006,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
add = n_blocks_count - o_blocks_count; add = n_blocks_count - o_blocks_count;
if (o_blocks_count + add < n_blocks_count) if (o_blocks_count + add < n_blocks_count)
ext4_warning(sb, __func__, ext4_warning(sb, "will only finish group (%llu blocks, %u new)",
"will only finish group (%llu"
" blocks, %u new)",
o_blocks_count + add, add); o_blocks_count + add, add);
/* See if the device is actually as big as what was requested */ /* See if the device is actually as big as what was requested */
bh = sb_bread(sb, o_blocks_count + add - 1); bh = sb_bread(sb, o_blocks_count + add - 1);
if (!bh) { if (!bh) {
ext4_warning(sb, __func__, ext4_warning(sb, "can't read last block, resize aborted");
"can't read last block, resize aborted");
return -ENOSPC; return -ENOSPC;
} }
brelse(bh); brelse(bh);
@ -1047,14 +1023,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
handle = ext4_journal_start_sb(sb, 3); handle = ext4_journal_start_sb(sb, 3);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
err = PTR_ERR(handle); err = PTR_ERR(handle);
ext4_warning(sb, __func__, "error %d on journal start", err); ext4_warning(sb, "error %d on journal start", err);
goto exit_put; goto exit_put;
} }
mutex_lock(&EXT4_SB(sb)->s_resize_lock); mutex_lock(&EXT4_SB(sb)->s_resize_lock);
if (o_blocks_count != ext4_blocks_count(es)) { if (o_blocks_count != ext4_blocks_count(es)) {
ext4_warning(sb, __func__, ext4_warning(sb, "multiple resizers run on filesystem!");
"multiple resizers run on filesystem!");
mutex_unlock(&EXT4_SB(sb)->s_resize_lock); mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_journal_stop(handle); ext4_journal_stop(handle);
err = -EBUSY; err = -EBUSY;
@ -1063,8 +1038,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
if ((err = ext4_journal_get_write_access(handle, if ((err = ext4_journal_get_write_access(handle,
EXT4_SB(sb)->s_sbh))) { EXT4_SB(sb)->s_sbh))) {
ext4_warning(sb, __func__, ext4_warning(sb, "error %d on journal write access", err);
"error %d on journal write access", err);
mutex_unlock(&EXT4_SB(sb)->s_resize_lock); mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_journal_stop(handle); ext4_journal_stop(handle);
goto exit_put; goto exit_put;

View File

@ -333,7 +333,7 @@ static void ext4_handle_error(struct super_block *sb)
sb->s_id); sb->s_id);
} }
void ext4_error(struct super_block *sb, const char *function, void __ext4_error(struct super_block *sb, const char *function,
const char *fmt, ...) const char *fmt, ...)
{ {
va_list args; va_list args;
@ -347,6 +347,42 @@ void ext4_error(struct super_block *sb, const char *function,
ext4_handle_error(sb); ext4_handle_error(sb);
} }
void ext4_error_inode(const char *function, struct inode *inode,
const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ",
inode->i_sb->s_id, function, inode->i_ino, current->comm);
vprintk(fmt, args);
printk("\n");
va_end(args);
ext4_handle_error(inode->i_sb);
}
void ext4_error_file(const char *function, struct file *file,
const char *fmt, ...)
{
va_list args;
struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path;
va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname));
if (!path)
path = "(unknown)";
printk(KERN_CRIT
"EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ",
inode->i_sb->s_id, function, inode->i_ino, current->comm, path);
vprintk(fmt, args);
printk("\n");
va_end(args);
ext4_handle_error(inode->i_sb);
}
static const char *ext4_decode_error(struct super_block *sb, int errno, static const char *ext4_decode_error(struct super_block *sb, int errno,
char nbuf[16]) char nbuf[16])
{ {
@ -450,7 +486,7 @@ void ext4_msg (struct super_block * sb, const char *prefix,
va_end(args); va_end(args);
} }
void ext4_warning(struct super_block *sb, const char *function, void __ext4_warning(struct super_block *sb, const char *function,
const char *fmt, ...) const char *fmt, ...)
{ {
va_list args; va_list args;
@ -507,7 +543,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV) if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
return; return;
ext4_warning(sb, __func__, ext4_warning(sb,
"updating to rev %d because of new feature flag, " "updating to rev %d because of new feature flag, "
"running e2fsck is recommended", "running e2fsck is recommended",
EXT4_DYNAMIC_REV); EXT4_DYNAMIC_REV);
@ -708,7 +744,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0; ei->i_reserved_quota = 0;
#endif #endif
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); INIT_LIST_HEAD(&ei->i_completed_io_list);
spin_lock_init(&ei->i_completed_io_lock);
ei->cur_aio_dio = NULL; ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0; ei->i_sync_tid = 0;
ei->i_datasync_tid = 0; ei->i_datasync_tid = 0;
@ -796,10 +833,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
if (sbi->s_qf_names[GRPQUOTA]) if (sbi->s_qf_names[GRPQUOTA])
seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) if (test_opt(sb, USRQUOTA))
seq_puts(seq, ",usrquota"); seq_puts(seq, ",usrquota");
if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) if (test_opt(sb, GRPQUOTA))
seq_puts(seq, ",grpquota"); seq_puts(seq, ",grpquota");
#endif #endif
} }
@ -926,6 +963,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, NOLOAD)) if (test_opt(sb, NOLOAD))
seq_puts(seq, ",norecovery"); seq_puts(seq, ",norecovery");
if (test_opt(sb, DIOREAD_NOLOCK))
seq_puts(seq, ",dioread_nolock");
ext4_show_quota_options(seq, sb); ext4_show_quota_options(seq, sb);
return 0; return 0;
@ -1109,6 +1149,7 @@ enum {
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
Opt_block_validity, Opt_noblock_validity, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_discard, Opt_nodiscard,
}; };
@ -1176,6 +1217,8 @@ static const match_table_t tokens = {
{Opt_auto_da_alloc, "auto_da_alloc=%u"}, {Opt_auto_da_alloc, "auto_da_alloc=%u"},
{Opt_auto_da_alloc, "auto_da_alloc"}, {Opt_auto_da_alloc, "auto_da_alloc"},
{Opt_noauto_da_alloc, "noauto_da_alloc"}, {Opt_noauto_da_alloc, "noauto_da_alloc"},
{Opt_dioread_nolock, "dioread_nolock"},
{Opt_dioread_lock, "dioread_lock"},
{Opt_discard, "discard"}, {Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"}, {Opt_nodiscard, "nodiscard"},
{Opt_err, NULL}, {Opt_err, NULL},
@ -1205,6 +1248,66 @@ static ext4_fsblk_t get_sb_block(void **data)
} }
#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
"Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
#ifdef CONFIG_QUOTA
static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
char *qname;
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
ext4_msg(sb, KERN_ERR,
"Cannot change journaled "
"quota options when quota turned on");
return 0;
}
qname = match_strdup(args);
if (!qname) {
ext4_msg(sb, KERN_ERR,
"Not enough memory for storing quotafile name");
return 0;
}
if (sbi->s_qf_names[qtype] &&
strcmp(sbi->s_qf_names[qtype], qname)) {
ext4_msg(sb, KERN_ERR,
"%s quota file already specified", QTYPE2NAME(qtype));
kfree(qname);
return 0;
}
sbi->s_qf_names[qtype] = qname;
if (strchr(sbi->s_qf_names[qtype], '/')) {
ext4_msg(sb, KERN_ERR,
"quotafile must be on filesystem root");
kfree(sbi->s_qf_names[qtype]);
sbi->s_qf_names[qtype] = NULL;
return 0;
}
set_opt(sbi->s_mount_opt, QUOTA);
return 1;
}
static int clear_qf_name(struct super_block *sb, int qtype)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"
" when quota turned on");
return 0;
}
/*
* The space will be released later when all options are confirmed
* to be correct
*/
sbi->s_qf_names[qtype] = NULL;
return 1;
}
#endif
static int parse_options(char *options, struct super_block *sb, static int parse_options(char *options, struct super_block *sb,
unsigned long *journal_devnum, unsigned long *journal_devnum,
@ -1217,8 +1320,7 @@ static int parse_options(char *options, struct super_block *sb,
int data_opt = 0; int data_opt = 0;
int option; int option;
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
int qtype, qfmt; int qfmt;
char *qname;
#endif #endif
if (!options) if (!options)
@ -1229,19 +1331,31 @@ static int parse_options(char *options, struct super_block *sb,
if (!*p) if (!*p)
continue; continue;
/*
* Initialize args struct so we know whether arg was
* found; some options take optional arguments.
*/
args[0].to = args[0].from = 0;
token = match_token(p, tokens, args); token = match_token(p, tokens, args);
switch (token) { switch (token) {
case Opt_bsd_df: case Opt_bsd_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
clear_opt(sbi->s_mount_opt, MINIX_DF); clear_opt(sbi->s_mount_opt, MINIX_DF);
break; break;
case Opt_minix_df: case Opt_minix_df:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
set_opt(sbi->s_mount_opt, MINIX_DF); set_opt(sbi->s_mount_opt, MINIX_DF);
break; break;
case Opt_grpid: case Opt_grpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
set_opt(sbi->s_mount_opt, GRPID); set_opt(sbi->s_mount_opt, GRPID);
break; break;
case Opt_nogrpid: case Opt_nogrpid:
ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38");
clear_opt(sbi->s_mount_opt, GRPID); clear_opt(sbi->s_mount_opt, GRPID);
break; break;
case Opt_resuid: case Opt_resuid:
if (match_int(&args[0], &option)) if (match_int(&args[0], &option))
@ -1378,14 +1492,13 @@ static int parse_options(char *options, struct super_block *sb,
data_opt = EXT4_MOUNT_WRITEBACK_DATA; data_opt = EXT4_MOUNT_WRITEBACK_DATA;
datacheck: datacheck:
if (is_remount) { if (is_remount) {
if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) if (test_opt(sb, DATA_FLAGS) != data_opt) {
!= data_opt) {
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
"Cannot change data mode on remount"); "Cannot change data mode on remount");
return 0; return 0;
} }
} else { } else {
sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS; clear_opt(sbi->s_mount_opt, DATA_FLAGS);
sbi->s_mount_opt |= data_opt; sbi->s_mount_opt |= data_opt;
} }
break; break;
@ -1397,63 +1510,22 @@ static int parse_options(char *options, struct super_block *sb,
break; break;
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
case Opt_usrjquota: case Opt_usrjquota:
qtype = USRQUOTA; if (!set_qf_name(sb, USRQUOTA, &args[0]))
goto set_qf_name; return 0;
break;
case Opt_grpjquota: case Opt_grpjquota:
qtype = GRPQUOTA; if (!set_qf_name(sb, GRPQUOTA, &args[0]))
set_qf_name:
if (sb_any_quota_loaded(sb) &&
!sbi->s_qf_names[qtype]) {
ext4_msg(sb, KERN_ERR,
"Cannot change journaled "
"quota options when quota turned on");
return 0; return 0;
}
qname = match_strdup(&args[0]);
if (!qname) {
ext4_msg(sb, KERN_ERR,
"Not enough memory for "
"storing quotafile name");
return 0;
}
if (sbi->s_qf_names[qtype] &&
strcmp(sbi->s_qf_names[qtype], qname)) {
ext4_msg(sb, KERN_ERR,
"%s quota file already "
"specified", QTYPE2NAME(qtype));
kfree(qname);
return 0;
}
sbi->s_qf_names[qtype] = qname;
if (strchr(sbi->s_qf_names[qtype], '/')) {
ext4_msg(sb, KERN_ERR,
"quotafile must be on "
"filesystem root");
kfree(sbi->s_qf_names[qtype]);
sbi->s_qf_names[qtype] = NULL;
return 0;
}
set_opt(sbi->s_mount_opt, QUOTA);
break; break;
case Opt_offusrjquota: case Opt_offusrjquota:
qtype = USRQUOTA; if (!clear_qf_name(sb, USRQUOTA))
goto clear_qf_name;
case Opt_offgrpjquota:
qtype = GRPQUOTA;
clear_qf_name:
if (sb_any_quota_loaded(sb) &&
sbi->s_qf_names[qtype]) {
ext4_msg(sb, KERN_ERR, "Cannot change "
"journaled quota options when "
"quota turned on");
return 0; return 0;
}
/*
* The space will be released later when all options
* are confirmed to be correct
*/
sbi->s_qf_names[qtype] = NULL;
break; break;
case Opt_offgrpjquota:
if (!clear_qf_name(sb, GRPQUOTA))
return 0;
break;
case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsold:
qfmt = QFMT_VFS_OLD; qfmt = QFMT_VFS_OLD;
goto set_qf_format; goto set_qf_format;
@ -1518,10 +1590,11 @@ set_qf_format:
clear_opt(sbi->s_mount_opt, BARRIER); clear_opt(sbi->s_mount_opt, BARRIER);
break; break;
case Opt_barrier: case Opt_barrier:
if (match_int(&args[0], &option)) { if (args[0].from) {
set_opt(sbi->s_mount_opt, BARRIER); if (match_int(&args[0], &option))
break; return 0;
} } else
option = 1; /* No argument, default to 1 */
if (option) if (option)
set_opt(sbi->s_mount_opt, BARRIER); set_opt(sbi->s_mount_opt, BARRIER);
else else
@ -1594,10 +1667,11 @@ set_qf_format:
set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
break; break;
case Opt_auto_da_alloc: case Opt_auto_da_alloc:
if (match_int(&args[0], &option)) { if (args[0].from) {
clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); if (match_int(&args[0], &option))
break; return 0;
} } else
option = 1; /* No argument, default to 1 */
if (option) if (option)
clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
else else
@ -1609,6 +1683,12 @@ set_qf_format:
case Opt_nodiscard: case Opt_nodiscard:
clear_opt(sbi->s_mount_opt, DISCARD); clear_opt(sbi->s_mount_opt, DISCARD);
break; break;
case Opt_dioread_nolock:
set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
break;
case Opt_dioread_lock:
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
break;
default: default:
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" " "Unrecognized mount option \"%s\" "
@ -1618,18 +1698,13 @@ set_qf_format:
} }
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) && if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
sbi->s_qf_names[USRQUOTA])
clear_opt(sbi->s_mount_opt, USRQUOTA); clear_opt(sbi->s_mount_opt, USRQUOTA);
if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) && if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
sbi->s_qf_names[GRPQUOTA])
clear_opt(sbi->s_mount_opt, GRPQUOTA); clear_opt(sbi->s_mount_opt, GRPQUOTA);
if ((sbi->s_qf_names[USRQUOTA] && if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {
(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
(sbi->s_qf_names[GRPQUOTA] &&
(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
ext4_msg(sb, KERN_ERR, "old and new quota " ext4_msg(sb, KERN_ERR, "old and new quota "
"format mixing"); "format mixing");
return 0; return 0;
@ -2432,8 +2507,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
def_mount_opts = le32_to_cpu(es->s_default_mount_opts); def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
if (def_mount_opts & EXT4_DEFM_DEBUG) if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG); set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups",
"2.6.38");
set_opt(sbi->s_mount_opt, GRPID); set_opt(sbi->s_mount_opt, GRPID);
}
if (def_mount_opts & EXT4_DEFM_UID16) if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32); set_opt(sbi->s_mount_opt, NO_UID32);
#ifdef CONFIG_EXT4_FS_XATTR #ifdef CONFIG_EXT4_FS_XATTR
@ -2445,11 +2523,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, POSIX_ACL); set_opt(sbi->s_mount_opt, POSIX_ACL);
#endif #endif
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; set_opt(sbi->s_mount_opt, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA; set_opt(sbi->s_mount_opt, ORDERED_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA; set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sbi->s_mount_opt, ERRORS_PANIC); set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@ -2477,7 +2555,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount; goto failed_mount;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
(EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
@ -2766,7 +2844,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
ext4_msg(sb, KERN_ERR, "required journal recovery " ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only"); "suppressed and not mounted read-only");
goto failed_mount4; goto failed_mount_wq;
} else { } else {
clear_opt(sbi->s_mount_opt, DATA_FLAGS); clear_opt(sbi->s_mount_opt, DATA_FLAGS);
set_opt(sbi->s_mount_opt, WRITEBACK_DATA); set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
@ -2779,7 +2857,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_64BIT)) { JBD2_FEATURE_INCOMPAT_64BIT)) {
ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
goto failed_mount4; goto failed_mount_wq;
} }
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
@ -2818,7 +2896,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
(sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
ext4_msg(sb, KERN_ERR, "Journal does not support " ext4_msg(sb, KERN_ERR, "Journal does not support "
"requested data journaling mode"); "requested data journaling mode");
goto failed_mount4; goto failed_mount_wq;
} }
default: default:
break; break;
@ -2826,13 +2904,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
no_journal: no_journal:
if (test_opt(sb, NOBH)) { if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
"its supported only with writeback mode"); "its supported only with writeback mode");
clear_opt(sbi->s_mount_opt, NOBH); clear_opt(sbi->s_mount_opt, NOBH);
} }
if (test_opt(sb, DIOREAD_NOLOCK)) {
ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
"not supported with nobh mode");
goto failed_mount_wq;
}
} }
EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
if (!EXT4_SB(sb)->dio_unwritten_wq) { if (!EXT4_SB(sb)->dio_unwritten_wq) {
@ -2897,6 +2979,18 @@ no_journal:
"requested data journaling mode"); "requested data journaling mode");
clear_opt(sbi->s_mount_opt, DELALLOC); clear_opt(sbi->s_mount_opt, DELALLOC);
} }
if (test_opt(sb, DIOREAD_NOLOCK)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - requested data journaling mode");
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
}
if (sb->s_blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock "
"option - block size is too small");
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
}
}
err = ext4_setup_system_zone(sb); err = ext4_setup_system_zone(sb);
if (err) { if (err) {
@ -3360,10 +3454,9 @@ static void ext4_clear_journal_err(struct super_block *sb,
char nbuf[16]; char nbuf[16];
errstr = ext4_decode_error(sb, j_errno, nbuf); errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, __func__, "Filesystem error recorded " ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr); "from previous mount: %s", errstr);
ext4_warning(sb, __func__, "Marking fs in need of " ext4_warning(sb, "Marking fs in need of filesystem check.");
"filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS); es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@ -3514,7 +3607,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_abort(sb, __func__, "Abort forced by user"); ext4_abort(sb, __func__, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
es = sbi->s_es; es = sbi->s_es;
@ -3917,9 +4010,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err = 0; int err = 0;
int offset = off & (sb->s_blocksize - 1); int offset = off & (sb->s_blocksize - 1);
int tocopy;
int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL; int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
size_t towrite = len;
struct buffer_head *bh; struct buffer_head *bh;
handle_t *handle = journal_current_handle(); handle_t *handle = journal_current_handle();
@ -3929,52 +4020,53 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
(unsigned long long)off, (unsigned long long)len); (unsigned long long)off, (unsigned long long)len);
return -EIO; return -EIO;
} }
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); /*
while (towrite > 0) { * Since we account only one data block in transaction credits,
tocopy = sb->s_blocksize - offset < towrite ? * then it is impossible to cross a block boundary.
sb->s_blocksize - offset : towrite; */
bh = ext4_bread(handle, inode, blk, 1, &err); if (sb->s_blocksize - offset < len) {
if (!bh) ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
goto out; " cancelled because not block aligned",
if (journal_quota) { (unsigned long long)off, (unsigned long long)len);
err = ext4_journal_get_write_access(handle, bh); return -EIO;
if (err) {
brelse(bh);
goto out;
}
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, tocopy);
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
if (journal_quota)
err = ext4_handle_dirty_metadata(handle, NULL, bh);
else {
/* Always do at least ordered writes for quotas */
err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
brelse(bh);
if (err)
goto out;
offset = 0;
towrite -= tocopy;
data += tocopy;
blk++;
} }
mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
bh = ext4_bread(handle, inode, blk, 1, &err);
if (!bh)
goto out;
if (journal_quota) {
err = ext4_journal_get_write_access(handle, bh);
if (err) {
brelse(bh);
goto out;
}
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, len);
flush_dcache_page(bh->b_page);
unlock_buffer(bh);
if (journal_quota)
err = ext4_handle_dirty_metadata(handle, NULL, bh);
else {
/* Always do at least ordered writes for quotas */
err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
brelse(bh);
out: out:
if (len == towrite) { if (err) {
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return err; return err;
} }
if (inode->i_size < off+len-towrite) { if (inode->i_size < off + len) {
i_size_write(inode, off+len-towrite); i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size; EXT4_I(inode)->i_disksize = inode->i_size;
} }
inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return len - towrite; return len;
} }
#endif #endif

View File

@ -227,7 +227,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) { if (ext4_xattr_check_block(bh)) {
bad_block: ext4_error(inode->i_sb, __func__, bad_block:
ext4_error(inode->i_sb,
"inode %lu: bad block %llu", inode->i_ino, "inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
@ -267,7 +268,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
void *end; void *end;
int error; int error;
if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
return -ENODATA; return -ENODATA;
error = ext4_get_inode_loc(inode, &iloc); error = ext4_get_inode_loc(inode, &iloc);
if (error) if (error)
@ -371,7 +372,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
ea_bdebug(bh, "b_count=%d, refcount=%d", ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) { if (ext4_xattr_check_block(bh)) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb,
"inode %lu: bad block %llu", inode->i_ino, "inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
@ -396,7 +397,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
void *end; void *end;
int error; int error;
if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
return 0; return 0;
error = ext4_get_inode_loc(inode, &iloc); error = ext4_get_inode_loc(inode, &iloc);
if (error) if (error)
@ -665,9 +666,8 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
atomic_read(&(bs->bh->b_count)), atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount)); le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext4_xattr_check_block(bs->bh)) { if (ext4_xattr_check_block(bs->bh)) {
ext4_error(sb, __func__, ext4_error(sb, "inode %lu: bad block %llu",
"inode %lu: bad block %llu", inode->i_ino, inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }
@ -880,9 +880,8 @@ cleanup_dquot:
goto cleanup; goto cleanup;
bad_block: bad_block:
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb, "inode %lu: bad block %llu",
"inode %lu: bad block %llu", inode->i_ino, inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
#undef header #undef header
@ -908,7 +907,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
is->s.base = is->s.first = IFIRST(header); is->s.base = is->s.first = IFIRST(header);
is->s.here = is->s.first; is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
error = ext4_xattr_check_names(IFIRST(header), is->s.end); error = ext4_xattr_check_names(IFIRST(header), is->s.end);
if (error) if (error)
return error; return error;
@ -940,10 +939,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
header = IHDR(inode, ext4_raw_inode(&is->iloc)); header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) { if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; ext4_set_inode_state(inode, EXT4_STATE_XATTR);
} else { } else {
header->h_magic = cpu_to_le32(0); header->h_magic = cpu_to_le32(0);
EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
} }
return 0; return 0;
} }
@ -986,8 +985,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (strlen(name) > 255) if (strlen(name) > 255)
return -ERANGE; return -ERANGE;
down_write(&EXT4_I(inode)->xattr_sem); down_write(&EXT4_I(inode)->xattr_sem);
no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
error = ext4_get_inode_loc(inode, &is.iloc); error = ext4_get_inode_loc(inode, &is.iloc);
if (error) if (error)
@ -997,10 +996,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (error) if (error)
goto cleanup; goto cleanup;
if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; ext4_clear_inode_state(inode, EXT4_STATE_NEW);
} }
error = ext4_xattr_ibody_find(inode, &i, &is); error = ext4_xattr_ibody_find(inode, &i, &is);
@ -1052,7 +1051,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
ext4_xattr_update_super_block(handle, inode->i_sb); ext4_xattr_update_super_block(handle, inode->i_sb);
inode->i_ctime = ext4_current_time(inode); inode->i_ctime = ext4_current_time(inode);
if (!value) if (!value)
EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
/* /*
* The bh is consumed by ext4_mark_iloc_dirty, even with * The bh is consumed by ext4_mark_iloc_dirty, even with
@ -1067,7 +1066,7 @@ cleanup:
brelse(is.iloc.bh); brelse(is.iloc.bh);
brelse(bs.bh); brelse(bs.bh);
if (no_expand == 0) if (no_expand == 0)
EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
up_write(&EXT4_I(inode)->xattr_sem); up_write(&EXT4_I(inode)->xattr_sem);
return error; return error;
} }
@ -1195,9 +1194,8 @@ retry:
if (!bh) if (!bh)
goto cleanup; goto cleanup;
if (ext4_xattr_check_block(bh)) { if (ext4_xattr_check_block(bh)) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb, "inode %lu: bad block %llu",
"inode %lu: bad block %llu", inode->i_ino, inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }
@ -1302,6 +1300,8 @@ retry:
/* Remove the chosen entry from the inode */ /* Remove the chosen entry from the inode */
error = ext4_xattr_ibody_set(handle, inode, &i, is); error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto cleanup;
entry = IFIRST(header); entry = IFIRST(header);
if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize) if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
@ -1372,16 +1372,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
goto cleanup; goto cleanup;
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh) { if (!bh) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb, "inode %lu: block %llu read error",
"inode %lu: block %llu read error", inode->i_ino, inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) || if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) { BHDR(bh)->h_blocks != cpu_to_le32(1)) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb, "inode %lu: bad block %llu",
"inode %lu: bad block %llu", inode->i_ino, inode->i_ino, EXT4_I(inode)->i_file_acl);
EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
ext4_xattr_release_block(handle, inode, bh); ext4_xattr_release_block(handle, inode, bh);
@ -1506,7 +1504,7 @@ again:
} }
bh = sb_bread(inode->i_sb, ce->e_block); bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) { if (!bh) {
ext4_error(inode->i_sb, __func__, ext4_error(inode->i_sb,
"inode %lu: block %lu read error", "inode %lu: block %lu read error",
inode->i_ino, (unsigned long) ce->e_block); inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >= } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=

View File

@ -507,6 +507,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
if (blocknr < journal->j_tail) if (blocknr < journal->j_tail)
freed = freed + journal->j_last - journal->j_first; freed = freed + journal->j_last - journal->j_first;
trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
jbd_debug(1, jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %lu), " "Cleaning journal tail from %d to %d (offset %lu), "
"freeing %lu\n", "freeing %lu\n",

View File

@ -883,8 +883,7 @@ restart_loop:
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh); bh = jh2bh(jh);
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
jh->b_transaction == journal->j_running_transaction);
/* /*
* If there is undo-protected committed data against * If there is undo-protected committed data against
@ -930,12 +929,12 @@ restart_loop:
/* A buffer which has been freed while still being /* A buffer which has been freed while still being
* journaled by a previous transaction may end up still * journaled by a previous transaction may end up still
* being dirty here, but we want to avoid writing back * being dirty here, but we want to avoid writing back
* that buffer in the future now that the last use has * that buffer in the future after the "add to orphan"
* been committed. That's not only a performance gain, * operation been committed, That's not only a performance
* it also stops aliasing problems if the buffer is left * gain, it also stops aliasing problems if the buffer is
* behind for writeback and gets reallocated for another * left behind for writeback and gets reallocated for another
* use in a different page. */ * use in a different page. */
if (buffer_freed(bh)) { if (buffer_freed(bh) && !jh->b_next_transaction) {
clear_buffer_freed(bh); clear_buffer_freed(bh);
clear_buffer_jbddirty(bh); clear_buffer_jbddirty(bh);
} }

View File

@ -39,6 +39,8 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/log2.h>
#include <linux/vmalloc.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h> #include <trace/events/jbd2.h>
@ -93,6 +95,7 @@ EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
static void __journal_abort_soft (journal_t *journal, int errno); static void __journal_abort_soft (journal_t *journal, int errno);
static int jbd2_journal_create_slab(size_t slab_size);
/* /*
* Helper function used to manage commit timeouts * Helper function used to manage commit timeouts
@ -1248,6 +1251,13 @@ int jbd2_journal_load(journal_t *journal)
} }
} }
/*
* Create a slab for this blocksize
*/
err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
if (err)
return err;
/* Let the recovery code check whether it needs to recover any /* Let the recovery code check whether it needs to recover any
* data from the journal. */ * data from the journal. */
if (jbd2_journal_recover(journal)) if (jbd2_journal_recover(journal))
@ -1806,6 +1816,127 @@ size_t journal_tag_bytes(journal_t *journal)
return JBD2_TAG_SIZE32; return JBD2_TAG_SIZE32;
} }
/*
* JBD memory management
*
* These functions are used to allocate block-sized chunks of memory
* used for making copies of buffer_head data. Very often it will be
* page-sized chunks of data, but sometimes it will be in
* sub-page-size chunks. (For example, 16k pages on Power systems
* with a 4k block file system.) For blocks smaller than a page, we
* use a SLAB allocator. There are slab caches for each block size,
* which are allocated at mount time, if necessary, and we only free
* (all of) the slab caches when/if the jbd2 module is unloaded. For
* this reason we don't need to a mutex to protect access to
* jbd2_slab[] allocating or releasing memory; only in
* jbd2_journal_create_slab().
*/
#define JBD2_MAX_SLABS 8
static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
static DECLARE_MUTEX(jbd2_slab_create_sem);
static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
"jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
};
static void jbd2_journal_destroy_slabs(void)
{
int i;
for (i = 0; i < JBD2_MAX_SLABS; i++) {
if (jbd2_slab[i])
kmem_cache_destroy(jbd2_slab[i]);
jbd2_slab[i] = NULL;
}
}
static int jbd2_journal_create_slab(size_t size)
{
int i = order_base_2(size) - 10;
size_t slab_size;
if (size == PAGE_SIZE)
return 0;
if (i >= JBD2_MAX_SLABS)
return -EINVAL;
if (unlikely(i < 0))
i = 0;
down(&jbd2_slab_create_sem);
if (jbd2_slab[i]) {
up(&jbd2_slab_create_sem);
return 0; /* Already created */
}
slab_size = 1 << (i+10);
jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
slab_size, 0, NULL);
up(&jbd2_slab_create_sem);
if (!jbd2_slab[i]) {
printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
return -ENOMEM;
}
return 0;
}
static struct kmem_cache *get_slab(size_t size)
{
int i = order_base_2(size) - 10;
BUG_ON(i >= JBD2_MAX_SLABS);
if (unlikely(i < 0))
i = 0;
BUG_ON(jbd2_slab[i] == 0);
return jbd2_slab[i];
}
void *jbd2_alloc(size_t size, gfp_t flags)
{
void *ptr;
BUG_ON(size & (size-1)); /* Must be a power of 2 */
flags |= __GFP_REPEAT;
if (size == PAGE_SIZE)
ptr = (void *)__get_free_pages(flags, 0);
else if (size > PAGE_SIZE) {
int order = get_order(size);
if (order < 3)
ptr = (void *)__get_free_pages(flags, order);
else
ptr = vmalloc(size);
} else
ptr = kmem_cache_alloc(get_slab(size), flags);
/* Check alignment; SLUB has gotten this wrong in the past,
* and this can lead to user data corruption! */
BUG_ON(((unsigned long) ptr) & (size-1));
return ptr;
}
void jbd2_free(void *ptr, size_t size)
{
if (size == PAGE_SIZE) {
free_pages((unsigned long)ptr, 0);
return;
}
if (size > PAGE_SIZE) {
int order = get_order(size);
if (order < 3)
free_pages((unsigned long)ptr, order);
else
vfree(ptr);
return;
}
kmem_cache_free(get_slab(size), ptr);
};
/* /*
* Journal_head storage management * Journal_head storage management
*/ */
@ -2204,6 +2335,7 @@ static void jbd2_journal_destroy_caches(void)
jbd2_journal_destroy_revoke_caches(); jbd2_journal_destroy_revoke_caches();
jbd2_journal_destroy_jbd2_journal_head_cache(); jbd2_journal_destroy_jbd2_journal_head_cache();
jbd2_journal_destroy_handle_cache(); jbd2_journal_destroy_handle_cache();
jbd2_journal_destroy_slabs();
} }
static int __init journal_init(void) static int __init journal_init(void)

View File

@ -1727,6 +1727,21 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
if (!jh) if (!jh)
goto zap_buffer_no_jh; goto zap_buffer_no_jh;
/*
* We cannot remove the buffer from checkpoint lists until the
* transaction adding inode to orphan list (let's call it T)
* is committed. Otherwise if the transaction changing the
* buffer would be cleaned from the journal before T is
* committed, a crash will cause that the correct contents of
* the buffer will be lost. On the other hand we have to
* clear the buffer dirty bit at latest at the moment when the
* transaction marking the buffer as freed in the filesystem
* structures is committed because from that moment on the
* buffer can be reallocated and used by a different page.
* Since the block hasn't been freed yet but the inode has
* already been added to orphan list, it is safe for us to add
* the buffer to BJ_Forget list of the newest transaction.
*/
transaction = jh->b_transaction; transaction = jh->b_transaction;
if (transaction == NULL) { if (transaction == NULL) {
/* First case: not on any transaction. If it /* First case: not on any transaction. If it
@ -1783,16 +1798,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
} else if (transaction == journal->j_committing_transaction) { } else if (transaction == journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "on committing transaction"); JBUFFER_TRACE(jh, "on committing transaction");
/* /*
* If it is committing, we simply cannot touch it. We * The buffer is committing, we simply cannot touch
* can remove it's next_transaction pointer from the * it. So we just set j_next_transaction to the
* running transaction if that is set, but nothing * running transaction (if there is one) and mark
* else. */ * buffer as freed so that commit code knows it should
* clear dirty bits when it is done with the buffer.
*/
set_buffer_freed(bh); set_buffer_freed(bh);
if (jh->b_next_transaction) { if (journal->j_running_transaction && buffer_jbddirty(bh))
J_ASSERT(jh->b_next_transaction == jh->b_next_transaction = journal->j_running_transaction;
journal->j_running_transaction);
jh->b_next_transaction = NULL;
}
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
@ -1969,7 +1983,7 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
*/ */
void __jbd2_journal_refile_buffer(struct journal_head *jh) void __jbd2_journal_refile_buffer(struct journal_head *jh)
{ {
int was_dirty; int was_dirty, jlist;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
@ -1991,8 +2005,13 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
__jbd2_journal_temp_unlink_buffer(jh); __jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = jh->b_next_transaction; jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL; jh->b_next_transaction = NULL;
__jbd2_journal_file_buffer(jh, jh->b_transaction, if (buffer_freed(bh))
jh->b_modified ? BJ_Metadata : BJ_Reserved); jlist = BJ_Forget;
else if (jh->b_modified)
jlist = BJ_Metadata;
else
jlist = BJ_Reserved;
__jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
if (was_dirty) if (was_dirty)

View File

@ -69,15 +69,8 @@ extern u8 jbd2_journal_enable_debug;
#define jbd_debug(f, a...) /**/ #define jbd_debug(f, a...) /**/
#endif #endif
static inline void *jbd2_alloc(size_t size, gfp_t flags) extern void *jbd2_alloc(size_t size, gfp_t flags);
{ extern void jbd2_free(void *ptr, size_t size);
return (void *)__get_free_pages(flags, get_order(size));
}
static inline void jbd2_free(void *ptr, size_t size)
{
free_pages((unsigned long)ptr, get_order(size));
};
#define JBD2_MIN_JOURNAL_BLOCKS 1024 #define JBD2_MIN_JOURNAL_BLOCKS 1024

View File

@ -874,6 +874,107 @@ TRACE_EVENT(ext4_forget,
__entry->mode, __entry->is_metadata, __entry->block) __entry->mode, __entry->is_metadata, __entry->block)
); );
TRACE_EVENT(ext4_da_update_reserve_space,
TP_PROTO(struct inode *inode, int used_blocks),
TP_ARGS(inode, used_blocks),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
__field( int, used_blocks )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( int, allocated_meta_blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->used_blocks = used_blocks;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->used_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
);
TRACE_EVENT(ext4_da_reserve_space,
TP_PROTO(struct inode *inode, int md_needed),
TP_ARGS(inode, md_needed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
__field( int, md_needed )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->md_needed = md_needed;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->md_needed, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks)
);
TRACE_EVENT(ext4_da_release_space,
TP_PROTO(struct inode *inode, int freed_blocks),
TP_ARGS(inode, freed_blocks),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
__field( int, freed_blocks )
__field( int, reserved_data_blocks )
__field( int, reserved_meta_blocks )
__field( int, allocated_meta_blocks )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->freed_blocks = freed_blocks;
__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->freed_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
);
#endif /* _TRACE_EXT4_H */ #endif /* _TRACE_EXT4_H */
/* This part must be outside protection */ /* This part must be outside protection */

View File

@ -199,6 +199,34 @@ TRACE_EVENT(jbd2_checkpoint_stats,
__entry->forced_to_close, __entry->written, __entry->dropped) __entry->forced_to_close, __entry->written, __entry->dropped)
); );
TRACE_EVENT(jbd2_cleanup_journal_tail,
TP_PROTO(journal_t *journal, tid_t first_tid,
unsigned long block_nr, unsigned long freed),
TP_ARGS(journal, first_tid, block_nr, freed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( tid_t, tail_sequence )
__field( tid_t, first_tid )
__field(unsigned long, block_nr )
__field(unsigned long, freed )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->tail_sequence = journal->j_tail_sequence;
__entry->first_tid = first_tid;
__entry->block_nr = block_nr;
__entry->freed = freed;
),
TP_printk("dev %s from %u to %u offset %lu freed %lu",
jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
__entry->first_tid, __entry->block_nr, __entry->freed)
);
#endif /* _TRACE_JBD2_H */ #endif /* _TRACE_JBD2_H */
/* This part must be outside protection */ /* This part must be outside protection */