mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 12:42:02 +00:00
zonefs fixes for 6.8.0-rc5
- Fix direct write error handling to avoid a race between failed IO completion and the submission path itself which can result in an invalid file size exposed to the user after the failed IO. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQSRPv8tYSvhwAzJdzjdoc3SxdoYdgUCZc9S/AAKCRDdoc3SxdoY dgilAQDhQeRxzZLXO5lh5LGeqveo88kXuQclCK9VeqnCr0cnHQD/RTXvo464Vf4c DAuDtLxRA16sj8WlLkUVkvjMKdjYaQ8= =n1Tp -----END PGP SIGNATURE----- Merge tag 'zonefs-6.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs Pull zonefs fix from Damien Le Moal: - Fix direct write error handling to avoid a race between failed IO completion and the submission path itself which can result in an invalid file size exposed to the user after the failed IO. * tag 'zonefs-6.8-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs: zonefs: Improve error handling
This commit is contained in:
commit
efb0b63afc
@ -348,6 +348,11 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
|
||||
struct zonefs_inode_info *zi = ZONEFS_I(inode);
|
||||
|
||||
if (error) {
|
||||
/*
|
||||
* For Sync IOs, error recovery is called from
|
||||
* zonefs_file_dio_write().
|
||||
*/
|
||||
if (!is_sync_kiocb(iocb))
|
||||
zonefs_io_error(inode, true);
|
||||
return error;
|
||||
}
|
||||
@ -491,6 +496,14 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
ret = -EINVAL;
|
||||
goto inode_unlock;
|
||||
}
|
||||
/*
|
||||
* Advance the zone write pointer offset. This assumes that the
|
||||
* IO will succeed, which is OK to do because we do not allow
|
||||
* partial writes (IOMAP_DIO_PARTIAL is not set) and if the IO
|
||||
* fails, the error path will correct the write pointer offset.
|
||||
*/
|
||||
z->z_wpoffset += count;
|
||||
zonefs_inode_account_active(inode);
|
||||
mutex_unlock(&zi->i_truncate_mutex);
|
||||
}
|
||||
|
||||
@ -504,20 +517,19 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
if (ret == -ENOTBLK)
|
||||
ret = -EBUSY;
|
||||
|
||||
if (zonefs_zone_is_seq(z) &&
|
||||
(ret > 0 || ret == -EIOCBQUEUED)) {
|
||||
if (ret > 0)
|
||||
count = ret;
|
||||
|
||||
/*
|
||||
* Update the zone write pointer offset assuming the write
|
||||
* operation succeeded. If it did not, the error recovery path
|
||||
* will correct it. Also do active seq file accounting.
|
||||
* For a failed IO or partial completion, trigger error recovery
|
||||
* to update the zone write pointer offset to a correct value.
|
||||
* For asynchronous IOs, zonefs_file_write_dio_end_io() may already
|
||||
* have executed error recovery if the IO already completed when we
|
||||
* reach here. However, we cannot know that and execute error recovery
|
||||
* again (that will not change anything).
|
||||
*/
|
||||
mutex_lock(&zi->i_truncate_mutex);
|
||||
z->z_wpoffset += count;
|
||||
zonefs_inode_account_active(inode);
|
||||
mutex_unlock(&zi->i_truncate_mutex);
|
||||
if (zonefs_zone_is_seq(z)) {
|
||||
if (ret > 0 && ret != count)
|
||||
ret = -EIO;
|
||||
if (ret < 0 && ret != -EIOCBQUEUED)
|
||||
zonefs_io_error(inode, true);
|
||||
}
|
||||
|
||||
inode_unlock:
|
||||
|
@ -246,16 +246,18 @@ static void zonefs_inode_update_mode(struct inode *inode)
|
||||
z->z_mode = inode->i_mode;
|
||||
}
|
||||
|
||||
struct zonefs_ioerr_data {
|
||||
struct inode *inode;
|
||||
bool write;
|
||||
};
|
||||
|
||||
static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
struct zonefs_ioerr_data *err = data;
|
||||
struct inode *inode = err->inode;
|
||||
struct blk_zone *z = data;
|
||||
|
||||
*z = *zone;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
|
||||
bool write)
|
||||
{
|
||||
struct zonefs_zone *z = zonefs_inode_zone(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
|
||||
@ -270,8 +272,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
|
||||
data_size = zonefs_check_zone_condition(sb, z, zone);
|
||||
isize = i_size_read(inode);
|
||||
if (!(z->z_flags & (ZONEFS_ZONE_READONLY | ZONEFS_ZONE_OFFLINE)) &&
|
||||
!err->write && isize == data_size)
|
||||
return 0;
|
||||
!write && isize == data_size)
|
||||
return;
|
||||
|
||||
/*
|
||||
* At this point, we detected either a bad zone or an inconsistency
|
||||
@ -292,7 +294,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
|
||||
* In all cases, warn about inode size inconsistency and handle the
|
||||
* IO error according to the zone condition and to the mount options.
|
||||
*/
|
||||
if (zonefs_zone_is_seq(z) && isize != data_size)
|
||||
if (isize != data_size)
|
||||
zonefs_warn(sb,
|
||||
"inode %lu: invalid size %lld (should be %lld)\n",
|
||||
inode->i_ino, isize, data_size);
|
||||
@ -352,8 +354,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
|
||||
zonefs_i_size_write(inode, data_size);
|
||||
z->z_wpoffset = data_size;
|
||||
zonefs_inode_account_active(inode);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -367,23 +367,25 @@ void __zonefs_io_error(struct inode *inode, bool write)
|
||||
{
|
||||
struct zonefs_zone *z = zonefs_inode_zone(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
|
||||
unsigned int noio_flag;
|
||||
unsigned int nr_zones = 1;
|
||||
struct zonefs_ioerr_data err = {
|
||||
.inode = inode,
|
||||
.write = write,
|
||||
};
|
||||
struct blk_zone zone;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* The only files that have more than one zone are conventional zone
|
||||
* files with aggregated conventional zones, for which the inode zone
|
||||
* size is always larger than the device zone size.
|
||||
* Conventional zone have no write pointer and cannot become read-only
|
||||
* or offline. So simply fake a report for a single or aggregated zone
|
||||
* and let zonefs_handle_io_error() correct the zone inode information
|
||||
* according to the mount options.
|
||||
*/
|
||||
if (z->z_size > bdev_zone_sectors(sb->s_bdev))
|
||||
nr_zones = z->z_size >>
|
||||
(sbi->s_zone_sectors_shift + SECTOR_SHIFT);
|
||||
if (!zonefs_zone_is_seq(z)) {
|
||||
zone.start = z->z_sector;
|
||||
zone.len = z->z_size >> SECTOR_SHIFT;
|
||||
zone.wp = zone.start + zone.len;
|
||||
zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
|
||||
zone.cond = BLK_ZONE_COND_NOT_WP;
|
||||
zone.capacity = zone.len;
|
||||
goto handle_io_error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Memory allocations in blkdev_report_zones() can trigger a memory
|
||||
@ -394,12 +396,20 @@ void __zonefs_io_error(struct inode *inode, bool write)
|
||||
* the GFP_NOIO context avoids both problems.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = blkdev_report_zones(sb->s_bdev, z->z_sector, nr_zones,
|
||||
zonefs_io_error_cb, &err);
|
||||
if (ret != nr_zones)
|
||||
ret = blkdev_report_zones(sb->s_bdev, z->z_sector, 1,
|
||||
zonefs_io_error_cb, &zone);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
|
||||
if (ret != 1) {
|
||||
zonefs_err(sb, "Get inode %lu zone information failed %d\n",
|
||||
inode->i_ino, ret);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
zonefs_warn(sb, "remounting filesystem read-only\n");
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
return;
|
||||
}
|
||||
|
||||
handle_io_error:
|
||||
zonefs_handle_io_error(inode, &zone, write);
|
||||
}
|
||||
|
||||
static struct kmem_cache *zonefs_inode_cachep;
|
||||
|
Loading…
Reference in New Issue
Block a user