mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
vfs-6.13.untorn.writes
-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZzcopwAKCRCRxhvAZXjc oitWAQD68PGFI6/ES9x+qGsDFEZBH08icuO+a9dyaZXyNRosDgD/ex2zHj6F7IzS Ghgb9jiqWQ8l2+PDYfisxa/0jiqCbAk= =DmXf -----END PGP SIGNATURE----- Merge tag 'vfs-6.13.untorn.writes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull vfs untorn write support from Christian Brauner: "An atomic write is a write issed with torn-write protection. This means for a power failure or any hardware failure all or none of the data from the write will be stored, never a mix of old and new data. This work is already supported for block devices. If a block device is opened with O_DIRECT and the block device supports atomic write, then FMODE_CAN_ATOMIC_WRITE is added to the file of the opened block device. This contains the work to expand atomic write support to filesystems, specifically ext4 and XFS. Currently, only support for writing exactly one filesystem block atomically is added. Since it's now possible to have filesystem block size > page size for XFS, it's possible to write 4K+ blocks atomically on x86" * tag 'vfs-6.13.untorn.writes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: iomap: drop an obsolete comment in iomap_dio_bio_iter ext4: Do not fallback to buffered-io for DIO atomic write ext4: Support setting FMODE_CAN_ATOMIC_WRITE ext4: Check for atomic writes support in write iter ext4: Add statx support for atomic writes xfs: Support setting FMODE_CAN_ATOMIC_WRITE xfs: Validate atomic writes xfs: Support atomic write for statx fs: iomap: Atomic write support fs: Export generic_atomic_write_valid() block: Add bdev atomic write limits helpers fs/block: Check for IOCB_DIRECT in generic_atomic_write_valid() block/fs: Pass an iocb to generic_atomic_write_valid()
This commit is contained in:
commit
241c7ed4d4
@ -513,6 +513,21 @@ IOMAP_WRITE`` with any combination of the following enhancements:
|
|||||||
if the mapping is unwritten and the filesystem cannot handle zeroing
|
if the mapping is unwritten and the filesystem cannot handle zeroing
|
||||||
the unaligned regions without exposing stale contents.
|
the unaligned regions without exposing stale contents.
|
||||||
|
|
||||||
|
* ``IOMAP_ATOMIC``: This write is being issued with torn-write
|
||||||
|
protection.
|
||||||
|
Only a single bio can be created for the write, and the write must
|
||||||
|
not be split into multiple I/O requests, i.e. flag REQ_ATOMIC must be
|
||||||
|
set.
|
||||||
|
The file range to write must be aligned to satisfy the requirements
|
||||||
|
of both the filesystem and the underlying block device's atomic
|
||||||
|
commit capabilities.
|
||||||
|
If filesystem metadata updates are required (e.g. unwritten extent
|
||||||
|
conversion or copy on write), all updates for the entire file range
|
||||||
|
must be committed atomically as well.
|
||||||
|
Only one space mapping is allowed per untorn write.
|
||||||
|
Untorn writes must be aligned to, and must not be longer than, a
|
||||||
|
single file block.
|
||||||
|
|
||||||
Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
|
Callers commonly hold ``i_rwsem`` in shared or exclusive mode before
|
||||||
calling this function.
|
calling this function.
|
||||||
|
|
||||||
|
22
block/fops.c
22
block/fops.c
@ -35,13 +35,10 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
|
|||||||
return opf;
|
return opf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool blkdev_dio_invalid(struct block_device *bdev, loff_t pos,
|
static bool blkdev_dio_invalid(struct block_device *bdev, struct kiocb *iocb,
|
||||||
struct iov_iter *iter, bool is_atomic)
|
struct iov_iter *iter)
|
||||||
{
|
{
|
||||||
if (is_atomic && !generic_atomic_write_valid(iter, pos))
|
return iocb->ki_pos & (bdev_logical_block_size(bdev) - 1) ||
|
||||||
return true;
|
|
||||||
|
|
||||||
return pos & (bdev_logical_block_size(bdev) - 1) ||
|
|
||||||
!bdev_iter_is_aligned(bdev, iter);
|
!bdev_iter_is_aligned(bdev, iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -368,13 +365,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
|||||||
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||||
{
|
{
|
||||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||||
bool is_atomic = iocb->ki_flags & IOCB_ATOMIC;
|
|
||||||
unsigned int nr_pages;
|
unsigned int nr_pages;
|
||||||
|
|
||||||
if (!iov_iter_count(iter))
|
if (!iov_iter_count(iter))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (blkdev_dio_invalid(bdev, iocb->ki_pos, iter, is_atomic))
|
if (blkdev_dio_invalid(bdev, iocb, iter))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
|
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
|
||||||
@ -383,7 +379,7 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
|||||||
return __blkdev_direct_IO_simple(iocb, iter, bdev,
|
return __blkdev_direct_IO_simple(iocb, iter, bdev,
|
||||||
nr_pages);
|
nr_pages);
|
||||||
return __blkdev_direct_IO_async(iocb, iter, bdev, nr_pages);
|
return __blkdev_direct_IO_async(iocb, iter, bdev, nr_pages);
|
||||||
} else if (is_atomic) {
|
} else if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
return __blkdev_direct_IO(iocb, iter, bdev, bio_max_segs(nr_pages));
|
return __blkdev_direct_IO(iocb, iter, bdev, bio_max_segs(nr_pages));
|
||||||
@ -625,7 +621,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
|||||||
if (!bdev)
|
if (!bdev)
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
|
|
||||||
if (bdev_can_atomic_write(bdev) && filp->f_flags & O_DIRECT)
|
if (bdev_can_atomic_write(bdev))
|
||||||
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
||||||
|
|
||||||
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
|
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
|
||||||
@ -700,6 +696,12 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
|
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||||
|
ret = generic_atomic_write_valid(iocb, from);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
size -= iocb->ki_pos;
|
size -= iocb->ki_pos;
|
||||||
if (iov_iter_count(from) > size) {
|
if (iov_iter_count(from) > size) {
|
||||||
shorted = iov_iter_count(from) - size;
|
shorted = iov_iter_count(from) - size;
|
||||||
|
@ -1729,6 +1729,10 @@ struct ext4_sb_info {
|
|||||||
*/
|
*/
|
||||||
struct work_struct s_sb_upd_work;
|
struct work_struct s_sb_upd_work;
|
||||||
|
|
||||||
|
/* Atomic write unit values in bytes */
|
||||||
|
unsigned int s_awu_min;
|
||||||
|
unsigned int s_awu_max;
|
||||||
|
|
||||||
/* Ext4 fast commit sub transaction ID */
|
/* Ext4 fast commit sub transaction ID */
|
||||||
atomic_t s_fc_subtid;
|
atomic_t s_fc_subtid;
|
||||||
|
|
||||||
@ -3855,6 +3859,12 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
|
|||||||
return buffer_uptodate(bh);
|
return buffer_uptodate(bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool ext4_inode_can_atomic_write(struct inode *inode)
|
||||||
|
{
|
||||||
|
|
||||||
|
return S_ISREG(inode->i_mode) && EXT4_SB(inode->i_sb)->s_awu_min > 0;
|
||||||
|
}
|
||||||
|
|
||||||
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
|
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
|
||||||
loff_t pos, unsigned len,
|
loff_t pos, unsigned len,
|
||||||
get_block_t *get_block);
|
get_block_t *get_block);
|
||||||
|
@ -599,6 +599,13 @@ out:
|
|||||||
ssize_t err;
|
ssize_t err;
|
||||||
loff_t endbyte;
|
loff_t endbyte;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There is no support for atomic writes on buffered-io yet,
|
||||||
|
* we should never fallback to buffered-io for DIO atomic
|
||||||
|
* writes.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC);
|
||||||
|
|
||||||
offset = iocb->ki_pos;
|
offset = iocb->ki_pos;
|
||||||
err = ext4_buffered_write_iter(iocb, from);
|
err = ext4_buffered_write_iter(iocb, from);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
@ -692,6 +699,20 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
return ext4_dax_write_iter(iocb, from);
|
return ext4_dax_write_iter(iocb, from);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||||
|
size_t len = iov_iter_count(from);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (len < EXT4_SB(inode->i_sb)->s_awu_min ||
|
||||||
|
len > EXT4_SB(inode->i_sb)->s_awu_max)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
ret = generic_atomic_write_valid(iocb, from);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_DIRECT)
|
if (iocb->ki_flags & IOCB_DIRECT)
|
||||||
return ext4_dio_write_iter(iocb, from);
|
return ext4_dio_write_iter(iocb, from);
|
||||||
else
|
else
|
||||||
@ -884,6 +905,9 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ext4_inode_can_atomic_write(inode))
|
||||||
|
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
||||||
|
|
||||||
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
||||||
return dquot_file_open(inode, filp);
|
return dquot_file_open(inode, filp);
|
||||||
}
|
}
|
||||||
|
@ -3444,17 +3444,34 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
|
||||||
|
{
|
||||||
|
/* must be a directio to fall back to buffered */
|
||||||
|
if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) !=
|
||||||
|
(IOMAP_WRITE | IOMAP_DIRECT))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* atomic writes are all-or-nothing */
|
||||||
|
if (flags & IOMAP_ATOMIC)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* can only try again if we wrote nothing */
|
||||||
|
return written == 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
|
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
|
||||||
ssize_t written, unsigned flags, struct iomap *iomap)
|
ssize_t written, unsigned flags, struct iomap *iomap)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Check to see whether an error occurred while writing out the data to
|
* Check to see whether an error occurred while writing out the data to
|
||||||
* the allocated blocks. If so, return the magic error code so that we
|
* the allocated blocks. If so, return the magic error code for
|
||||||
* fallback to buffered I/O and attempt to complete the remainder of
|
* non-atomic write so that we fallback to buffered I/O and attempt to
|
||||||
* the I/O. Any blocks that may have been allocated in preparation for
|
* complete the remainder of the I/O.
|
||||||
* the direct I/O will be reused during buffered I/O.
|
* For non-atomic writes, any blocks that may have been
|
||||||
|
* allocated in preparation for the direct I/O will be reused during
|
||||||
|
* buffered I/O. For atomic write, we never fallback to buffered-io.
|
||||||
*/
|
*/
|
||||||
if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0)
|
if (ext4_want_directio_fallback(flags, written))
|
||||||
return -ENOTBLK;
|
return -ENOTBLK;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -5578,6 +5595,18 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((request_mask & STATX_WRITE_ATOMIC) && S_ISREG(inode->i_mode)) {
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||||
|
unsigned int awu_min = 0, awu_max = 0;
|
||||||
|
|
||||||
|
if (ext4_inode_can_atomic_write(inode)) {
|
||||||
|
awu_min = sbi->s_awu_min;
|
||||||
|
awu_max = sbi->s_awu_max;
|
||||||
|
}
|
||||||
|
|
||||||
|
generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
|
||||||
|
}
|
||||||
|
|
||||||
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
|
||||||
if (flags & EXT4_APPEND_FL)
|
if (flags & EXT4_APPEND_FL)
|
||||||
stat->attributes |= STATX_ATTR_APPEND;
|
stat->attributes |= STATX_ATTR_APPEND;
|
||||||
|
@ -4425,6 +4425,36 @@ static int ext4_handle_clustersize(struct super_block *sb)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ext4_atomic_write_init: Initializes filesystem min & max atomic write units.
|
||||||
|
* @sb: super block
|
||||||
|
* TODO: Later add support for bigalloc
|
||||||
|
*/
|
||||||
|
static void ext4_atomic_write_init(struct super_block *sb)
|
||||||
|
{
|
||||||
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
|
struct block_device *bdev = sb->s_bdev;
|
||||||
|
|
||||||
|
if (!bdev_can_atomic_write(bdev))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!ext4_has_feature_extents(sb))
|
||||||
|
return;
|
||||||
|
|
||||||
|
sbi->s_awu_min = max(sb->s_blocksize,
|
||||||
|
bdev_atomic_write_unit_min_bytes(bdev));
|
||||||
|
sbi->s_awu_max = min(sb->s_blocksize,
|
||||||
|
bdev_atomic_write_unit_max_bytes(bdev));
|
||||||
|
if (sbi->s_awu_min && sbi->s_awu_max &&
|
||||||
|
sbi->s_awu_min <= sbi->s_awu_max) {
|
||||||
|
ext4_msg(sb, KERN_NOTICE, "Supports (experimental) DIO atomic writes awu_min: %u, awu_max: %u",
|
||||||
|
sbi->s_awu_min, sbi->s_awu_max);
|
||||||
|
} else {
|
||||||
|
sbi->s_awu_min = 0;
|
||||||
|
sbi->s_awu_max = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void ext4_fast_commit_init(struct super_block *sb)
|
static void ext4_fast_commit_init(struct super_block *sb)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||||
@ -5336,6 +5366,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
|||||||
|
|
||||||
spin_lock_init(&sbi->s_bdev_wb_lock);
|
spin_lock_init(&sbi->s_bdev_wb_lock);
|
||||||
|
|
||||||
|
ext4_atomic_write_init(sb);
|
||||||
ext4_fast_commit_init(sb);
|
ext4_fast_commit_init(sb);
|
||||||
|
|
||||||
sb->s_root = NULL;
|
sb->s_root = NULL;
|
||||||
|
@ -271,7 +271,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
|
|||||||
* clearing the WRITE_THROUGH flag in the dio request.
|
* clearing the WRITE_THROUGH flag in the dio request.
|
||||||
*/
|
*/
|
||||||
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
|
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
|
||||||
const struct iomap *iomap, bool use_fua)
|
const struct iomap *iomap, bool use_fua, bool atomic)
|
||||||
{
|
{
|
||||||
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
|
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
|
||||||
|
|
||||||
@ -283,6 +283,8 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
|
|||||||
opflags |= REQ_FUA;
|
opflags |= REQ_FUA;
|
||||||
else
|
else
|
||||||
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
|
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
|
||||||
|
if (atomic)
|
||||||
|
opflags |= REQ_ATOMIC;
|
||||||
|
|
||||||
return opflags;
|
return opflags;
|
||||||
}
|
}
|
||||||
@ -293,7 +295,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||||||
const struct iomap *iomap = &iter->iomap;
|
const struct iomap *iomap = &iter->iomap;
|
||||||
struct inode *inode = iter->inode;
|
struct inode *inode = iter->inode;
|
||||||
unsigned int fs_block_size = i_blocksize(inode), pad;
|
unsigned int fs_block_size = i_blocksize(inode), pad;
|
||||||
loff_t length = iomap_length(iter);
|
const loff_t length = iomap_length(iter);
|
||||||
|
bool atomic = iter->flags & IOMAP_ATOMIC;
|
||||||
loff_t pos = iter->pos;
|
loff_t pos = iter->pos;
|
||||||
blk_opf_t bio_opf;
|
blk_opf_t bio_opf;
|
||||||
struct bio *bio;
|
struct bio *bio;
|
||||||
@ -303,6 +306,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||||||
size_t copied = 0;
|
size_t copied = 0;
|
||||||
size_t orig_count;
|
size_t orig_count;
|
||||||
|
|
||||||
|
if (atomic && length != fs_block_size)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
|
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
|
||||||
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
|
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -377,12 +383,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
|
||||||
* Set the operation flags early so that bio_iov_iter_get_pages
|
|
||||||
* can set up the page vector appropriately for a ZONE_APPEND
|
|
||||||
* operation.
|
|
||||||
*/
|
|
||||||
bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua);
|
|
||||||
|
|
||||||
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
|
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
|
||||||
do {
|
do {
|
||||||
@ -415,6 +416,17 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
|
|||||||
}
|
}
|
||||||
|
|
||||||
n = bio->bi_iter.bi_size;
|
n = bio->bi_iter.bi_size;
|
||||||
|
if (WARN_ON_ONCE(atomic && n != length)) {
|
||||||
|
/*
|
||||||
|
* This bio should have covered the complete length,
|
||||||
|
* which it doesn't, so error. We may need to zero out
|
||||||
|
* the tail (complete FS block), similar to when
|
||||||
|
* bio_iov_iter_get_pages() returns an error, above.
|
||||||
|
*/
|
||||||
|
ret = -EINVAL;
|
||||||
|
bio_put(bio);
|
||||||
|
goto zero_tail;
|
||||||
|
}
|
||||||
if (dio->flags & IOMAP_DIO_WRITE) {
|
if (dio->flags & IOMAP_DIO_WRITE) {
|
||||||
task_io_account_write(n);
|
task_io_account_write(n);
|
||||||
} else {
|
} else {
|
||||||
@ -598,6 +610,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||||
iomi.flags |= IOMAP_NOWAIT;
|
iomi.flags |= IOMAP_NOWAIT;
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_ATOMIC)
|
||||||
|
iomi.flags |= IOMAP_ATOMIC;
|
||||||
|
|
||||||
if (iov_iter_rw(iter) == READ) {
|
if (iov_iter_rw(iter) == READ) {
|
||||||
/* reads can always complete inline */
|
/* reads can always complete inline */
|
||||||
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
dio->flags |= IOMAP_DIO_INLINE_COMP;
|
||||||
@ -659,7 +674,17 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
|
|||||||
if (ret != -EAGAIN) {
|
if (ret != -EAGAIN) {
|
||||||
trace_iomap_dio_invalidate_fail(inode, iomi.pos,
|
trace_iomap_dio_invalidate_fail(inode, iomi.pos,
|
||||||
iomi.len);
|
iomi.len);
|
||||||
ret = -ENOTBLK;
|
if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||||
|
/*
|
||||||
|
* folio invalidation failed, maybe
|
||||||
|
* this is transient, unlock and see if
|
||||||
|
* the caller tries again.
|
||||||
|
*/
|
||||||
|
ret = -EAGAIN;
|
||||||
|
} else {
|
||||||
|
/* fall back to buffered write */
|
||||||
|
ret = -ENOTBLK;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
goto out_free_dio;
|
goto out_free_dio;
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
|
|||||||
{ IOMAP_REPORT, "REPORT" }, \
|
{ IOMAP_REPORT, "REPORT" }, \
|
||||||
{ IOMAP_FAULT, "FAULT" }, \
|
{ IOMAP_FAULT, "FAULT" }, \
|
||||||
{ IOMAP_DIRECT, "DIRECT" }, \
|
{ IOMAP_DIRECT, "DIRECT" }, \
|
||||||
{ IOMAP_NOWAIT, "NOWAIT" }
|
{ IOMAP_NOWAIT, "NOWAIT" }, \
|
||||||
|
{ IOMAP_ATOMIC, "ATOMIC" }
|
||||||
|
|
||||||
#define IOMAP_F_FLAGS_STRINGS \
|
#define IOMAP_F_FLAGS_STRINGS \
|
||||||
{ IOMAP_F_NEW, "NEW" }, \
|
{ IOMAP_F_NEW, "NEW" }, \
|
||||||
|
@ -1830,18 +1830,22 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos)
|
int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter)
|
||||||
{
|
{
|
||||||
size_t len = iov_iter_count(iter);
|
size_t len = iov_iter_count(iter);
|
||||||
|
|
||||||
if (!iter_is_ubuf(iter))
|
if (!iter_is_ubuf(iter))
|
||||||
return false;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!is_power_of_2(len))
|
if (!is_power_of_2(len))
|
||||||
return false;
|
return -EINVAL;
|
||||||
|
|
||||||
if (!IS_ALIGNED(pos, len))
|
if (!IS_ALIGNED(iocb->ki_pos, len))
|
||||||
return false;
|
return -EINVAL;
|
||||||
|
|
||||||
return true;
|
if (!(iocb->ki_flags & IOCB_DIRECT))
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(generic_atomic_write_valid);
|
||||||
|
@ -2115,6 +2115,13 @@ xfs_alloc_buftarg(
|
|||||||
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
|
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
|
||||||
mp, ops);
|
mp, ops);
|
||||||
|
|
||||||
|
if (bdev_can_atomic_write(btp->bt_bdev)) {
|
||||||
|
btp->bt_bdev_awu_min = bdev_atomic_write_unit_min_bytes(
|
||||||
|
btp->bt_bdev);
|
||||||
|
btp->bt_bdev_awu_max = bdev_atomic_write_unit_max_bytes(
|
||||||
|
btp->bt_bdev);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When allocating the buftargs we have not yet read the super block and
|
* When allocating the buftargs we have not yet read the super block and
|
||||||
* thus don't know the file system sector size yet.
|
* thus don't know the file system sector size yet.
|
||||||
|
@ -124,6 +124,10 @@ struct xfs_buftarg {
|
|||||||
struct percpu_counter bt_io_count;
|
struct percpu_counter bt_io_count;
|
||||||
struct ratelimit_state bt_ioerror_rl;
|
struct ratelimit_state bt_ioerror_rl;
|
||||||
|
|
||||||
|
/* Atomic write unit values */
|
||||||
|
unsigned int bt_bdev_awu_min;
|
||||||
|
unsigned int bt_bdev_awu_max;
|
||||||
|
|
||||||
/* built-in cache, if we're not using the perag one */
|
/* built-in cache, if we're not using the perag one */
|
||||||
struct xfs_buf_cache bt_cache[];
|
struct xfs_buf_cache bt_cache[];
|
||||||
};
|
};
|
||||||
|
@ -852,6 +852,20 @@ xfs_file_write_iter(
|
|||||||
if (IS_DAX(inode))
|
if (IS_DAX(inode))
|
||||||
return xfs_file_dax_write(iocb, from);
|
return xfs_file_dax_write(iocb, from);
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_ATOMIC) {
|
||||||
|
/*
|
||||||
|
* Currently only atomic writing of a single FS block is
|
||||||
|
* supported. It would be possible to atomic write smaller than
|
||||||
|
* a FS block, but there is no requirement to support this.
|
||||||
|
* Note that iomap also does not support this yet.
|
||||||
|
*/
|
||||||
|
if (ocount != ip->i_mount->m_sb.sb_blocksize)
|
||||||
|
return -EINVAL;
|
||||||
|
ret = generic_atomic_write_valid(iocb, from);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_DIRECT) {
|
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||||
/*
|
/*
|
||||||
* Allow a directio write to fall back to a buffered
|
* Allow a directio write to fall back to a buffered
|
||||||
@ -1239,6 +1253,8 @@ xfs_file_open(
|
|||||||
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
|
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
|
||||||
|
if (xfs_inode_can_atomicwrite(XFS_I(inode)))
|
||||||
|
file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
||||||
return generic_file_open(inode, file);
|
return generic_file_open(inode, file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -332,6 +332,21 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
|
|||||||
(XFS_IS_REALTIME_INODE(ip) ? \
|
(XFS_IS_REALTIME_INODE(ip) ? \
|
||||||
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
|
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
xfs_inode_can_atomicwrite(
|
||||||
|
struct xfs_inode *ip)
|
||||||
|
{
|
||||||
|
struct xfs_mount *mp = ip->i_mount;
|
||||||
|
struct xfs_buftarg *target = xfs_inode_buftarg(ip);
|
||||||
|
|
||||||
|
if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
|
||||||
|
return false;
|
||||||
|
if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In-core inode flags.
|
* In-core inode flags.
|
||||||
*/
|
*/
|
||||||
|
@ -570,6 +570,20 @@ xfs_stat_blksize(
|
|||||||
return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
|
return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
xfs_get_atomic_write_attr(
|
||||||
|
struct xfs_inode *ip,
|
||||||
|
unsigned int *unit_min,
|
||||||
|
unsigned int *unit_max)
|
||||||
|
{
|
||||||
|
if (!xfs_inode_can_atomicwrite(ip)) {
|
||||||
|
*unit_min = *unit_max = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
*unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
|
||||||
|
}
|
||||||
|
|
||||||
STATIC int
|
STATIC int
|
||||||
xfs_vn_getattr(
|
xfs_vn_getattr(
|
||||||
struct mnt_idmap *idmap,
|
struct mnt_idmap *idmap,
|
||||||
@ -639,6 +653,14 @@ xfs_vn_getattr(
|
|||||||
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
|
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
|
||||||
stat->dio_offset_align = bdev_logical_block_size(bdev);
|
stat->dio_offset_align = bdev_logical_block_size(bdev);
|
||||||
}
|
}
|
||||||
|
if (request_mask & STATX_WRITE_ATOMIC) {
|
||||||
|
unsigned int unit_min, unit_max;
|
||||||
|
|
||||||
|
xfs_get_atomic_write_attr(ip, &unit_min,
|
||||||
|
&unit_max);
|
||||||
|
generic_fill_statx_atomic_writes(stat,
|
||||||
|
unit_min, unit_max);
|
||||||
|
}
|
||||||
fallthrough;
|
fallthrough;
|
||||||
default:
|
default:
|
||||||
stat->blksize = xfs_stat_blksize(ip);
|
stat->blksize = xfs_stat_blksize(ip);
|
||||||
|
@ -1674,6 +1674,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
bdev_atomic_write_unit_min_bytes(struct block_device *bdev)
|
||||||
|
{
|
||||||
|
if (!bdev_can_atomic_write(bdev))
|
||||||
|
return 0;
|
||||||
|
return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline unsigned int
|
||||||
|
bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
|
||||||
|
{
|
||||||
|
if (!bdev_can_atomic_write(bdev))
|
||||||
|
return 0;
|
||||||
|
return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
|
||||||
|
}
|
||||||
|
|
||||||
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
|
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
|
||||||
|
|
||||||
#endif /* _LINUX_BLKDEV_H */
|
#endif /* _LINUX_BLKDEV_H */
|
||||||
|
@ -3798,6 +3798,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
|
|||||||
return !c;
|
return !c;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
|
int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
|
||||||
|
|
||||||
#endif /* _LINUX_FS_H */
|
#endif /* _LINUX_FS_H */
|
||||||
|
@ -178,6 +178,7 @@ struct iomap_folio_ops {
|
|||||||
#else
|
#else
|
||||||
#define IOMAP_DAX 0
|
#define IOMAP_DAX 0
|
||||||
#endif /* CONFIG_FS_DAX */
|
#endif /* CONFIG_FS_DAX */
|
||||||
|
#define IOMAP_ATOMIC (1 << 9)
|
||||||
|
|
||||||
struct iomap_ops {
|
struct iomap_ops {
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user