ext4: fix lost truncate due to race with writeback
The following race can lead to a loss of i_disksize update from truncate thus resulting in a wrong inode size if the inode size isn't updated again before inode is reclaimed: ext4_setattr() mpage_map_and_submit_extent() EXT4_I(inode)->i_disksize = attr->ia_size; ... ... disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT /* False because i_size isn't * updated yet */ if (disksize > i_size_read(inode)) /* True, because i_disksize is * already truncated */ if (disksize > EXT4_I(inode)->i_disksize) /* Overwrite i_disksize * update from truncate */ ext4_update_i_disksize() i_size_write(inode, attr->ia_size); For other places updating i_disksize such race cannot happen because i_mutex prevents these races. Writeback is the only place where we do not hold i_mutex and we cannot grab it there because of lock ordering. We fix the race by doing both i_disksize and i_size update in truncate atomically under i_data_sem and in mpage_map_and_submit_extent() we move the check against i_size under i_data_sem as well. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@vger.kernel.org
This commit is contained in:
@@ -2432,16 +2432,32 @@ do { \
|
|||||||
#define EXT4_FREECLUSTERS_WATERMARK 0
|
#define EXT4_FREECLUSTERS_WATERMARK 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Update i_disksize. Requires i_mutex to avoid races with truncate */
|
||||||
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
|
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
|
||||||
{
|
{
|
||||||
/*
|
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
|
||||||
* XXX: replace with spinlock if seen contended -bzzz
|
!mutex_is_locked(&inode->i_mutex));
|
||||||
*/
|
|
||||||
down_write(&EXT4_I(inode)->i_data_sem);
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
if (newsize > EXT4_I(inode)->i_disksize)
|
if (newsize > EXT4_I(inode)->i_disksize)
|
||||||
EXT4_I(inode)->i_disksize = newsize;
|
EXT4_I(inode)->i_disksize = newsize;
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
return ;
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Update i_disksize after writeback has been started. Races with truncate
|
||||||
|
* are avoided by checking i_size under i_data_sem.
|
||||||
|
*/
|
||||||
|
static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
|
||||||
|
{
|
||||||
|
loff_t i_size;
|
||||||
|
|
||||||
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
|
i_size = i_size_read(inode);
|
||||||
|
if (newsize > i_size)
|
||||||
|
newsize = i_size;
|
||||||
|
if (newsize > EXT4_I(inode)->i_disksize)
|
||||||
|
EXT4_I(inode)->i_disksize = newsize;
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ext4_group_info {
|
struct ext4_group_info {
|
||||||
|
|||||||
@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
|
|||||||
|
|
||||||
/* Update on-disk size after IO is submitted */
|
/* Update on-disk size after IO is submitted */
|
||||||
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
|
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
|
||||||
if (disksize > i_size_read(inode))
|
|
||||||
disksize = i_size_read(inode);
|
|
||||||
if (disksize > EXT4_I(inode)->i_disksize) {
|
if (disksize > EXT4_I(inode)->i_disksize) {
|
||||||
int err2;
|
int err2;
|
||||||
|
|
||||||
ext4_update_i_disksize(inode, disksize);
|
ext4_wb_update_i_disksize(inode, disksize);
|
||||||
err2 = ext4_mark_inode_dirty(handle, inode);
|
err2 = ext4_mark_inode_dirty(handle, inode);
|
||||||
if (err2)
|
if (err2)
|
||||||
ext4_error(inode->i_sb,
|
ext4_error(inode->i_sb,
|
||||||
@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
|||||||
error = ext4_orphan_add(handle, inode);
|
error = ext4_orphan_add(handle, inode);
|
||||||
orphan = 1;
|
orphan = 1;
|
||||||
}
|
}
|
||||||
|
down_write(&EXT4_I(inode)->i_data_sem);
|
||||||
EXT4_I(inode)->i_disksize = attr->ia_size;
|
EXT4_I(inode)->i_disksize = attr->ia_size;
|
||||||
rc = ext4_mark_inode_dirty(handle, inode);
|
rc = ext4_mark_inode_dirty(handle, inode);
|
||||||
if (!error)
|
if (!error)
|
||||||
error = rc;
|
error = rc;
|
||||||
|
/*
|
||||||
|
* We have to update i_size under i_data_sem together
|
||||||
|
* with i_disksize to avoid races with writeback code
|
||||||
|
* running ext4_wb_update_i_disksize().
|
||||||
|
*/
|
||||||
|
if (!error)
|
||||||
|
i_size_write(inode, attr->ia_size);
|
||||||
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
if (error) {
|
if (error) {
|
||||||
ext4_orphan_del(NULL, inode);
|
ext4_orphan_del(NULL, inode);
|
||||||
goto err_out;
|
goto err_out;
|
||||||
}
|
}
|
||||||
}
|
} else
|
||||||
|
i_size_write(inode, attr->ia_size);
|
||||||
|
|
||||||
i_size_write(inode, attr->ia_size);
|
|
||||||
/*
|
/*
|
||||||
* Blocks are going to be removed from the inode. Wait
|
* Blocks are going to be removed from the inode. Wait
|
||||||
* for dio in flight. Temporarily disable
|
* for dio in flight. Temporarily disable
|
||||||
|
|||||||
Reference in New Issue
Block a user