From bd8e0ff956456ad9071dbb6c2ed7d33bd22fc216 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 17 Mar 2015 14:04:02 -0700 Subject: [PATCH 01/30] new helper: iov_iter_rw() Get either READ or WRITE out of iter->type. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- include/linux/uio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/uio.h b/include/linux/uio.h index 15f11fb9fff6..8b01e1c3c614 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -111,6 +111,14 @@ static inline bool iter_is_iovec(struct iov_iter *i) return !(i->type & (ITER_BVEC | ITER_KVEC)); } +/* + * Get one of READ or WRITE out of iter->type without any other flags OR'd in + * with it. + * + * The ?: is just for type safety. + */ +#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK) + /* * Cap the iov_iter by given limit; note that the second argument is * *not* the new size - it's upper limit for such. Passing it a value From 17f8c842d24ac054e4212c82b5bd6ae455a334f3 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:50 -0700 Subject: [PATCH 02/30] Remove rw from {,__,do_}blockdev_direct_IO() Most filesystems call through to these at some point, so we'll start here. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- fs/affs/file.c | 2 +- fs/block_dev.c | 5 ++--- fs/btrfs/inode.c | 8 ++++---- fs/direct-io.c | 39 ++++++++++++++++++--------------------- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 2 +- fs/ext4/indirect.c | 11 ++++++----- fs/ext4/inode.c | 2 +- fs/f2fs/data.c | 2 +- fs/fat/inode.c | 2 +- fs/gfs2/aops.c | 5 ++--- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 3 +-- fs/jfs/inode.c | 2 +- fs/nilfs2/inode.c | 3 +-- fs/ocfs2/aops.c | 16 +++++++--------- fs/reiserfs/inode.c | 2 +- fs/udf/inode.c | 2 +- fs/xfs/xfs_aops.c | 9 ++++----- include/linux/fs.h | 18 ++++++++++-------- 20 files changed, 65 insertions(+), 72 deletions(-) diff --git a/fs/affs/file.c b/fs/affs/file.c index 7c1a3d4c19c2..1edc0d4b40db 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -405,7 +405,7 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, return 0; } - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, affs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block); if (ret < 0 && (rw & WRITE)) affs_write_failed(mapping, offset + count); return ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index b5e87896f517..bc23afd35fdb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -152,9 +152,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, - offset, blkdev_get_block, - NULL, NULL, 0); + return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset, + blkdev_get_block, NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 686331f22b15..e9a3ff8a85fd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8174,10 +8174,10 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, wakeup = false; } - ret = __blockdev_direct_IO(rw, iocb, inode, - BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, - iter, offset, btrfs_get_blocks_direct, NULL, - btrfs_submit_direct, flags); + ret = __blockdev_direct_IO(iocb, inode, + BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, + iter, offset, btrfs_get_blocks_direct, NULL, + btrfs_submit_direct, flags); if (rw & WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) diff --git a/fs/direct-io.c b/fs/direct-io.c index 6fb00e3f1059..c3b560b24a46 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1093,10 +1093,10 @@ static inline int drop_refcount(struct dio *dio) * for the whole file. */ static inline ssize_t -do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) +do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags) { unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; @@ -1110,9 +1110,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct blk_plug plug; unsigned long align = offset | iov_iter_alignment(iter); - if (rw & WRITE) - rw = WRITE_ODIRECT; - /* * Avoid references to bdev if not absolutely needed to give * the early prefetch in the caller enough time. @@ -1127,7 +1124,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } /* watch out for a 0 len io from a tricksy fs */ - if (rw == READ && !iov_iter_count(iter)) + if (iov_iter_rw(iter) == READ && !iov_iter_count(iter)) return 0; dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); @@ -1143,7 +1140,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio->flags = flags; if (dio->flags & DIO_LOCKING) { - if (rw == READ) { + if (iov_iter_rw(iter) == READ) { struct address_space *mapping = iocb->ki_filp->f_mapping; @@ -1169,19 +1166,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (is_sync_kiocb(iocb)) dio->is_async = false; else if (!(dio->flags & DIO_ASYNC_EXTEND) && - (rw & WRITE) && end > i_size_read(inode)) + iov_iter_rw(iter) == WRITE && end > i_size_read(inode)) dio->is_async = false; else dio->is_async = true; dio->inode = inode; - dio->rw = rw; + dio->rw = iov_iter_rw(iter) == WRITE ? WRITE_ODIRECT : READ; /* * For AIO O_(D)SYNC writes we need to defer completions to a workqueue * so that we can call ->fsync. */ - if (dio->is_async && (rw & WRITE) && + if (dio->is_async && iov_iter_rw(iter) == WRITE && ((iocb->ki_filp->f_flags & O_DSYNC) || IS_SYNC(iocb->ki_filp->f_mapping->host))) { retval = dio_set_defer_completion(dio); @@ -1274,7 +1271,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * we can let i_mutex go now that its achieved its purpose * of protecting us from looking up uninitialized blocks. */ - if (rw == READ && (dio->flags & DIO_LOCKING)) + if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING)) mutex_unlock(&dio->inode->i_mutex); /* @@ -1286,7 +1283,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, */ BUG_ON(retval == -EIOCBQUEUED); if (dio->is_async && retval == 0 && dio->result && - (rw == READ || dio->result == count)) + (iov_iter_rw(iter) == READ || dio->result == count)) retval = -EIOCBQUEUED; else dio_await_completion(dio); @@ -1300,11 +1297,11 @@ out: return retval; } -ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags) +ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, + dio_iodone_t end_io, dio_submit_t submit_io, + int flags) { /* * The block device state is needed in the end to finally @@ -1318,8 +1315,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, prefetch(bdev->bd_queue); prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); - return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, - get_block, end_io, submit_io, flags); + return do_blockdev_direct_IO(iocb, inode, bdev, iter, offset, get_block, + end_io, submit_io, flags); } EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index df9d6afbc5d5..3cbeb1b63acf 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -864,7 +864,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, NULL, DIO_LOCKING); else - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, + ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); if (ret < 0 && (rw & WRITE)) ext2_write_failed(mapping, offset + count); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index db07ffbe7c85..6fb376c8d938 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1856,7 +1856,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 740c7871c117..ae4ffc27abc6 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -693,9 +693,10 @@ retry: ret = dax_do_io(rw, iocb, inode, iter, offset, ext4_get_block, NULL, 0); else - ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iter, offset, - ext4_get_block, NULL, NULL, 0); + ret = __blockdev_direct_IO(iocb, inode, + inode->i_sb->s_bdev, iter, + offset, ext4_get_block, NULL, + NULL, 0); inode_dio_done(inode); } else { locked: @@ -703,8 +704,8 @@ locked: ret = dax_do_io(rw, iocb, inode, iter, offset, ext4_get_block, NULL, DIO_LOCKING); else - ret = blockdev_direct_IO(rw, iocb, inode, iter, - offset, ext4_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, + ext4_get_block); if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a3f451370bef..ec049c04b197 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3037,7 +3037,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, ext4_end_io_dio, dio_flags); else - ret = __blockdev_direct_IO(rw, iocb, inode, + ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, get_block_func, ext4_end_io_dio, NULL, dio_flags); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 497f8515d205..e16adebcb9b6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1159,7 +1159,7 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (rw & WRITE) __allocate_data_blocks(inode, offset, count); - err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); + err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8521207de229..a1a39f571e78 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -274,7 +274,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block); if (ret < 0 && (rw & WRITE)) fat_write_failed(mapping, offset + count); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index fe6634d25d1d..59983a18cab4 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1095,9 +1095,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, truncate_inode_pages_range(mapping, lstart, end); } - rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, - iter, offset, - gfs2_get_block_direct, NULL, NULL, 0); + rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, + offset, gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9337065bcc67..e92d175d1fd7 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -133,7 +133,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, hfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 5f86cadb0542..2a98dc07c22c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -131,8 +131,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, - hfsplus_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, hfsplus_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 3197aed10614..c20f7883543f 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -339,7 +339,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, jfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index ab4987bc637f..3727b8caa46e 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -318,8 +318,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, iter, offset, - nilfs_get_block); + size = blockdev_direct_IO(iocb, inode, iter, offset, nilfs_get_block); /* * In case of error extending write may have instantiated a few diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index e1bf18c5d25e..68cb199fb2b6 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -738,10 +738,9 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, di_bh = NULL; } - written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, - iter, offset, - ocfs2_direct_IO_get_blocks, - ocfs2_dio_end_io, NULL, 0); + written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, + offset, ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); if (unlikely(written < 0)) { loff_t i_size = i_size_read(inode); @@ -844,11 +843,10 @@ static ssize_t ocfs2_direct_IO(int rw, return 0; if (rw == READ) - return __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, - iter, offset, - ocfs2_direct_IO_get_blocks, - ocfs2_dio_end_io, NULL, 0); + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, offset, + ocfs2_direct_IO_get_blocks, + ocfs2_dio_end_io, NULL, 0); else return ocfs2_direct_IO_write(iocb, iter, offset); } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 9312b7842e03..a51e9177b056 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3286,7 +3286,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, + ret = blockdev_direct_IO(iocb, inode, iter, offset, reiserfs_get_blocks_direct_io); /* diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 9c1fbd23913d..3adf49c01c19 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -225,7 +225,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); + ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block); if (unlikely(ret < 0 && (rw & WRITE))) udf_write_failed(mapping, offset + count); return ret; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4f8cdc59bc38..5ca504c66e85 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1504,14 +1504,13 @@ xfs_vm_direct_IO( struct block_device *bdev = xfs_find_bdev_for_inode(inode); if (rw & WRITE) { - return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, + return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, + xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); } - return __blockdev_direct_IO(rw, iocb, inode, bdev, iter, - offset, xfs_get_blocks_direct, - NULL, NULL, 0); + return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, + xfs_get_blocks_direct, NULL, NULL, 0); } /* diff --git a/include/linux/fs.h b/include/linux/fs.h index f1e3f65255a8..c67b6de8be33 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2634,16 +2634,18 @@ enum { void dio_end_io(struct bio *bio, int error); -ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, struct iov_iter *iter, loff_t offset, - get_block_t get_block, dio_iodone_t end_io, - dio_submit_t submit_io, int flags); +ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct iov_iter *iter, + loff_t offset, get_block_t get_block, + dio_iodone_t end_io, dio_submit_t submit_io, + int flags); -static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - struct inode *inode, struct iov_iter *iter, loff_t offset, - get_block_t get_block) +static inline ssize_t blockdev_direct_IO(struct kiocb *iocb, + struct inode *inode, + struct iov_iter *iter, loff_t offset, + get_block_t get_block) { - return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter, + return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, get_block, NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES); } From a95cd6311512bd954e88684eb39373f7f4b0a984 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:51 -0700 Subject: [PATCH 03/30] Remove rw from dax_{do_,}io() And use iov_iter_rw() instead. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- fs/dax.c | 27 +++++++++++++-------------- fs/ext2/inode.c | 4 ++-- fs/ext4/indirect.c | 4 ++-- fs/ext4/inode.c | 2 +- include/linux/fs.h | 4 ++-- 5 files changed, 20 insertions(+), 21 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index ed1619ec6537..a27846946525 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -98,9 +98,9 @@ static bool buffer_size_valid(struct buffer_head *bh) return bh->b_state != 0; } -static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, - loff_t start, loff_t end, get_block_t get_block, - struct buffer_head *bh) +static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, + loff_t start, loff_t end, get_block_t get_block, + struct buffer_head *bh) { ssize_t retval = 0; loff_t pos = start; @@ -109,7 +109,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, void *addr; bool hole = false; - if (rw != WRITE) + if (iov_iter_rw(iter) != WRITE) end = min(end, i_size_read(inode)); while (pos < end) { @@ -124,7 +124,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, bh->b_size = PAGE_ALIGN(end - pos); bh->b_state = 0; retval = get_block(inode, block, bh, - rw == WRITE); + iov_iter_rw(iter) == WRITE); if (retval) break; if (!buffer_size_valid(bh)) @@ -137,7 +137,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, bh->b_size -= done; } - hole = (rw != WRITE) && !buffer_written(bh); + hole = iov_iter_rw(iter) != WRITE && !buffer_written(bh); if (hole) { addr = NULL; size = bh->b_size - first; @@ -154,7 +154,7 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, max = min(pos + size, end); } - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) len = copy_from_iter(addr, max - pos, iter); else if (!hole) len = copy_to_iter(addr, max - pos, iter); @@ -173,7 +173,6 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, /** * dax_do_io - Perform I/O to a DAX file - * @rw: READ to read or WRITE to write * @iocb: The control block for this I/O * @inode: The file which the I/O is directed at * @iter: The addresses to do I/O from or to @@ -189,9 +188,9 @@ static ssize_t dax_io(int rw, struct inode *inode, struct iov_iter *iter, * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O * is in progress. */ -ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, - struct iov_iter *iter, loff_t pos, - get_block_t get_block, dio_iodone_t end_io, int flags) +ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, + struct iov_iter *iter, loff_t pos, get_block_t get_block, + dio_iodone_t end_io, int flags) { struct buffer_head bh; ssize_t retval = -EINVAL; @@ -199,7 +198,7 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, memset(&bh, 0, sizeof(bh)); - if ((flags & DIO_LOCKING) && (rw == READ)) { + if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) { struct address_space *mapping = inode->i_mapping; mutex_lock(&inode->i_mutex); retval = filemap_write_and_wait_range(mapping, pos, end - 1); @@ -212,9 +211,9 @@ ssize_t dax_do_io(int rw, struct kiocb *iocb, struct inode *inode, /* Protects against truncate */ atomic_inc(&inode->i_dio_count); - retval = dax_io(rw, inode, iter, pos, end, get_block, &bh); + retval = dax_io(inode, iter, pos, end, get_block, &bh); - if ((flags & DIO_LOCKING) && (rw == READ)) + if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) mutex_unlock(&inode->i_mutex); if ((retval > 0) && end_io) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 3cbeb1b63acf..14e8d1752685 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -861,8 +861,8 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, ssize_t ret; if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, ext2_get_block, - NULL, DIO_LOCKING); + ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL, + DIO_LOCKING); else ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index ae4ffc27abc6..cd81d01da0b0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -690,7 +690,7 @@ retry: goto locked; } if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, + ret = dax_do_io(iocb, inode, iter, offset, ext4_get_block, NULL, 0); else ret = __blockdev_direct_IO(iocb, inode, @@ -701,7 +701,7 @@ retry: } else { locked: if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, + ret = dax_do_io(iocb, inode, iter, offset, ext4_get_block, NULL, DIO_LOCKING); else ret = blockdev_direct_IO(iocb, inode, iter, offset, diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ec049c04b197..76b8cba5d041 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3034,7 +3034,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, dio_flags = DIO_LOCKING; } if (IS_DAX(inode)) - ret = dax_do_io(rw, iocb, inode, iter, offset, get_block_func, + ret = dax_do_io(iocb, inode, iter, offset, get_block_func, ext4_end_io_dio, dio_flags); else ret = __blockdev_direct_IO(iocb, inode, diff --git a/include/linux/fs.h b/include/linux/fs.h index c67b6de8be33..295bc589fe1b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2609,8 +2609,8 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset, extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -ssize_t dax_do_io(int rw, struct kiocb *, struct inode *, struct iov_iter *, - loff_t, get_block_t, dio_iodone_t, int flags); +ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t, + get_block_t, dio_iodone_t, int flags); int dax_clear_blocks(struct inode *, sector_t block, long size); int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); int dax_truncate_page(struct inode *, loff_t from, get_block_t); From 6f67376318abea58589ebe6d69dffeabb6f6c26a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:52 -0700 Subject: [PATCH 04/30] direct_IO: use iov_iter_rw() instead of rw everywhere The rw parameter to direct_IO is redundant with iov_iter->type, and treated slightly differently just about everywhere it's used: some users do rw & WRITE, and others do rw == WRITE where they should be doing a bitwise check. Simplify this with the new iov_iter_rw() helper, which always returns either READ or WRITE. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- drivers/staging/lustre/lustre/llite/rw26.c | 18 +++++++++--------- fs/9p/vfs_addr.c | 2 +- fs/affs/file.c | 4 ++-- fs/btrfs/inode.c | 10 +++++----- fs/ext2/inode.c | 2 +- fs/ext3/inode.c | 8 ++++---- fs/ext4/ext4.h | 4 ++-- fs/ext4/indirect.c | 10 +++++----- fs/ext4/inode.c | 20 ++++++++++---------- fs/f2fs/data.c | 16 ++++++++-------- fs/fat/inode.c | 4 ++-- fs/fuse/file.c | 13 +++++++------ fs/gfs2/aops.c | 7 +++---- fs/hfs/inode.c | 2 +- fs/hfsplus/inode.c | 2 +- fs/jfs/inode.c | 2 +- fs/nfs/direct.c | 2 +- fs/nilfs2/inode.c | 4 ++-- fs/ocfs2/aops.c | 2 +- fs/reiserfs/inode.c | 2 +- fs/udf/inode.c | 2 +- fs/xfs/xfs_aops.c | 2 +- 22 files changed, 69 insertions(+), 69 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 2f21304046aa..3aa9de6bcc40 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -399,7 +399,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, * size changing by concurrent truncates and writes. * 1. Need inode mutex to operate transient pages. */ - if (rw == READ) + if (iov_iter_rw(iter) == READ) mutex_lock(&inode->i_mutex); LASSERT(obj->cob_transient_pages == 0); @@ -408,7 +408,7 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, size_t offs; count = min_t(size_t, iov_iter_count(iter), size); - if (rw == READ) { + if (iov_iter_rw(iter) == READ) { if (file_offset >= i_size_read(inode)) break; if (file_offset + count > i_size_read(inode)) @@ -418,11 +418,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); if (likely(result > 0)) { int n = DIV_ROUND_UP(result + offs, PAGE_SIZE); - result = ll_direct_IO_26_seg(env, io, rw, inode, - file->f_mapping, - result, file_offset, - pages, n); - ll_free_user_pages(pages, n, rw==READ); + result = ll_direct_IO_26_seg(env, io, iov_iter_rw(iter), + inode, file->f_mapping, + result, file_offset, pages, + n); + ll_free_user_pages(pages, n, iov_iter_rw(iter) == READ); } if (unlikely(result <= 0)) { /* If we can't allocate a large enough buffer @@ -449,11 +449,11 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, } out: LASSERT(obj->cob_transient_pages == 0); - if (rw == READ) + if (iov_iter_rw(iter) == READ) mutex_unlock(&inode->i_mutex); if (tot_bytes > 0) { - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { struct lov_stripe_md *lsm; lsm = ccc_inode_lsm_get(inode); diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 2e38f9a5b472..dd5543b1d183 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -253,7 +253,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) struct file *file = iocb->ki_filp; ssize_t n; int err = 0; - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { n = p9_client_write(file->private_data, pos, iter, &err); if (n) { struct inode *inode = file_inode(file); diff --git a/fs/affs/file.c b/fs/affs/file.c index 1edc0d4b40db..7f05a468d594 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -398,7 +398,7 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t ret; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { loff_t size = offset + count; if (AFFS_I(inode)->mmu_private < size) @@ -406,7 +406,7 @@ affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, } ret = blockdev_direct_IO(iocb, inode, iter, offset, affs_get_block); - if (ret < 0 && (rw & WRITE)) + if (ret < 0 && iov_iter_rw(iter) == WRITE) affs_write_failed(mapping, offset + count); return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9a3ff8a85fd..ca69e83d4f3c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8081,7 +8081,7 @@ free_ordered: bio_endio(dio_bio, ret); } -static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, +static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, const struct iov_iter *iter, loff_t offset) { int seg; @@ -8096,7 +8096,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io goto out; /* If this is a write we don't need to check anymore */ - if (rw & WRITE) + if (iov_iter_rw(iter) == WRITE) return 0; /* * Check to make sure we don't have duplicate iov_base's in this @@ -8126,7 +8126,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, bool relock = false; ssize_t ret; - if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) + if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset)) return 0; atomic_inc(&inode->i_dio_count); @@ -8144,7 +8144,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, filemap_fdatawrite_range(inode->i_mapping, offset, offset + count - 1); - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { /* * If the write DIO is beyond the EOF, we need update * the isize, but it is protected by i_mutex. So we can @@ -8178,7 +8178,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 14e8d1752685..685e514c57dd 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -866,7 +866,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, else ret = blockdev_direct_IO(iocb, inode, iter, offset, ext2_get_block); - if (ret < 0 && (rw & WRITE)) + if (ret < 0 && iov_iter_rw(iter) == WRITE) ext2_write_failed(mapping, offset + count); return ret; } diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 6fb376c8d938..c70839d26ccd 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1832,9 +1832,9 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int retries = 0; - trace_ext3_direct_IO_enter(inode, offset, count, rw); + trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { loff_t final_size = offset + count; if (final_size > inode->i_size) { @@ -1861,7 +1861,7 @@ retry: * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; @@ -1908,7 +1908,7 @@ retry: ret = err; } out: - trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); + trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f63c3d5805c4..2031c994024e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2152,8 +2152,8 @@ extern void ext4_da_update_reserve_space(struct inode *inode, /* indirect.c */ extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); -extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset); +extern ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index cd81d01da0b0..3580629e42d3 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -642,8 +642,8 @@ out: * crashes then stale disk data _may_ be exposed inside the file. But current * VFS code falls back into buffered path in that case so we are safe. */ -ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -654,7 +654,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int retries = 0; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { loff_t final_size = offset + count; if (final_size > inode->i_size) { @@ -676,7 +676,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) { + if (iov_iter_rw(iter) == READ && ext4_should_dioread_nolock(inode)) { /* * Nolock dioread optimization may be dynamically disabled * via ext4_inode_block_unlocked_dio(). Check inode's state @@ -707,7 +707,7 @@ locked: ret = blockdev_direct_IO(iocb, inode, iter, offset, ext4_get_block); - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 76b8cba5d041..cf6ba6536035 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2952,8 +2952,8 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * if the machine crashes during the write. * */ -static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -2966,8 +2966,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ext4_io_end_t *io_end = NULL; /* Use the old path for reads and writes beyond i_size. */ - if (rw != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(rw, iocb, iter, offset); + if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) + return ext4_ind_direct_IO(iocb, iter, offset); BUG_ON(iocb->private == NULL); @@ -2976,7 +2976,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, * conversion. This also disallows race between truncate() and * overwrite DIO as i_dio_count needs to be incremented under i_mutex. */ - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) atomic_inc(&inode->i_dio_count); /* If we do a overwrite dio, i_mutex locking can be released */ @@ -3078,7 +3078,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, } retake_lock: - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) inode_dio_done(inode); /* take i_mutex locking again if we do a ovewrite dio */ if (overwrite) { @@ -3107,12 +3107,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (ext4_has_inline_data(inode)) return 0; - trace_ext4_direct_IO_enter(inode, offset, count, rw); + trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(rw, iocb, iter, offset); + ret = ext4_ext_direct_IO(iocb, iter, offset); else - ret = ext4_ind_direct_IO(rw, iocb, iter, offset); - trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); + ret = ext4_ind_direct_IO(iocb, iter, offset); + trace_ext4_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); return ret; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e16adebcb9b6..ce25f62edfa7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1118,12 +1118,12 @@ static int f2fs_write_end(struct file *file, return copied; } -static int check_direct_IO(struct inode *inode, int rw, - struct iov_iter *iter, loff_t offset) +static int check_direct_IO(struct inode *inode, struct iov_iter *iter, + loff_t offset) { unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; - if (rw == READ) + if (iov_iter_rw(iter) == READ) return 0; if (offset & blocksize_mask) @@ -1151,19 +1151,19 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, return err; } - if (check_direct_IO(inode, rw, iter, offset)) + if (check_direct_IO(inode, iter, offset)) return 0; - trace_f2fs_direct_IO_enter(inode, offset, count, rw); + trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - if (rw & WRITE) + if (iov_iter_rw(iter) == WRITE) __allocate_data_blocks(inode, offset, count); err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); - if (err < 0 && (rw & WRITE)) + if (err < 0 && iov_iter_rw(iter) == WRITE) f2fs_write_failed(mapping, offset + count); - trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); + trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); return err; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index a1a39f571e78..342d791b28db 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -255,7 +255,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); ssize_t ret; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { /* * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. @@ -275,7 +275,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * condition of fat_get_block() and ->truncate(). */ ret = blockdev_direct_IO(iocb, inode, iter, offset, fat_get_block); - if (ret < 0 && (rw & WRITE)) + if (ret < 0 && iov_iter_rw(iter) == WRITE) fat_write_failed(mapping, offset + count); return ret; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e1afdd7abf90..c1a67da6a8a0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2800,11 +2800,11 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, inode = file->f_mapping->host; i_size = i_size_read(inode); - if ((rw == READ) && (offset > i_size)) + if ((iov_iter_rw(iter) == READ) && (offset > i_size)) return 0; /* optimization for short read */ - if (async_dio && rw != WRITE && offset + count > i_size) { + if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; count = min_t(loff_t, count, fuse_round_up(i_size - offset)); @@ -2819,7 +2819,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, io->bytes = -1; io->size = 0; io->offset = offset; - io->write = (rw == WRITE); + io->write = (iov_iter_rw(iter) == WRITE); io->err = 0; io->file = file; /* @@ -2834,13 +2834,14 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) + if (!is_sync_kiocb(iocb) && (offset + count > i_size) && + iov_iter_rw(iter) == WRITE) io->async = false; if (io->async && is_sync_kiocb(iocb)) io->done = &wait; - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { ret = generic_write_checks(file, &pos, &count, 0); if (!ret) { iov_iter_truncate(iter, count); @@ -2865,7 +2866,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, kfree(io); - if (rw == WRITE) { + if (iov_iter_rw(iter) == WRITE) { if (ret > 0) fuse_write_update_size(inode, pos); else if (ret < 0 && offset + count > i_size) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 59983a18cab4..e22e6e686a11 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1016,13 +1016,12 @@ out: /** * gfs2_ok_for_dio - check that dio is valid on this file * @ip: The inode - * @rw: READ or WRITE * @offset: The offset at which we are reading or writing * * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) * 1 (to accept the i/o request) */ -static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) +static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) { /* * Should we return an error here? I can't see that O_DIRECT for @@ -1061,7 +1060,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, rv = gfs2_glock_nq(&gh); if (rv) return rv; - rv = gfs2_ok_for_dio(ip, rw, offset); + rv = gfs2_ok_for_dio(ip, offset); if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ @@ -1091,7 +1090,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, rv = filemap_write_and_wait_range(mapping, lstart, end); if (rv) goto out; - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) truncate_inode_pages_range(mapping, lstart, end); } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index e92d175d1fd7..0085d527a55c 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -139,7 +139,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 2a98dc07c22c..afcde36b506b 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -137,7 +137,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index c20f7883543f..e7047b63ffc5 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -345,7 +345,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c3929fb2ab26..e8482b8f4830 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -267,7 +267,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t #else VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); - if (rw == READ) + if (iov_iter_rw(iter) == READ) return nfs_file_direct_read(iocb, iter, pos); return nfs_file_direct_write(iocb, iter, pos); #endif /* CONFIG_NFS_SWAP */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 3727b8caa46e..4726f1493d5d 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -314,7 +314,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); ssize_t size; - if (rw == WRITE) + if (iov_iter_rw(iter) == WRITE) return 0; /* Needs synchronization with the cleaner */ @@ -324,7 +324,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && size < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && size < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 68cb199fb2b6..0ee9474cca46 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -842,7 +842,7 @@ static ssize_t ocfs2_direct_IO(int rw, if (i_size_read(inode) <= offset && !full_coherency) return 0; - if (rw == READ) + if (iov_iter_rw(iter) == READ) return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, offset, ocfs2_direct_IO_get_blocks, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a51e9177b056..d61ab13244a8 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3293,7 +3293,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ - if (unlikely((rw & WRITE) && ret < 0)) { + if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = offset + count; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 3adf49c01c19..a685aea93068 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -226,7 +226,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, ssize_t ret; ret = blockdev_direct_IO(iocb, inode, iter, offset, udf_get_block); - if (unlikely(ret < 0 && (rw & WRITE))) + if (unlikely(ret < 0 && iov_iter_rw(iter) == WRITE)) udf_write_failed(mapping, offset + count); return ret; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5ca504c66e85..532d5279df2f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1503,7 +1503,7 @@ xfs_vm_direct_IO( struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); - if (rw & WRITE) { + if (iov_iter_rw(iter) == WRITE) { return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, From 22c6186ecea0be9eff1c399298ad36e94a59995f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 16 Mar 2015 04:33:53 -0700 Subject: [PATCH 05/30] direct_IO: remove rw from a_ops->direct_IO() Now that no one is using rw, remove it completely. Signed-off-by: Omar Sandoval Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- Documentation/filesystems/vfs.txt | 2 +- drivers/staging/lustre/lustre/llite/rw26.c | 4 ++-- fs/9p/vfs_addr.c | 3 +-- fs/affs/file.c | 3 +-- fs/block_dev.c | 3 +-- fs/btrfs/inode.c | 4 ++-- fs/ceph/addr.c | 3 +-- fs/cifs/file.c | 3 +-- fs/exofs/inode.c | 4 ++-- fs/ext2/inode.c | 3 +-- fs/ext3/inode.c | 4 ++-- fs/ext4/inode.c | 4 ++-- fs/f2fs/data.c | 4 ++-- fs/fat/inode.c | 3 +-- fs/fuse/file.c | 3 +-- fs/gfs2/aops.c | 4 ++-- fs/hfs/inode.c | 4 ++-- fs/hfsplus/inode.c | 4 ++-- fs/jfs/inode.c | 4 ++-- fs/nfs/direct.c | 3 +-- fs/nilfs2/inode.c | 3 +-- fs/ocfs2/aops.c | 4 +--- fs/reiserfs/inode.c | 4 ++-- fs/udf/file.c | 3 +-- fs/udf/inode.c | 3 +-- fs/xfs/xfs_aops.c | 1 - include/linux/fs.h | 2 +- include/linux/nfs_fs.h | 2 +- mm/filemap.c | 4 ++-- mm/page_io.c | 4 +--- 31 files changed, 42 insertions(+), 59 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index c3cd6279e92e..7cdbca44e343 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -196,7 +196,7 @@ prototypes: void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 207cdca68bed..5d833b32bbcd 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -590,7 +590,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index 3aa9de6bcc40..0d7ce6b0e23c 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -359,8 +359,8 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io, * up to 22MB for 128kB kmalloc and up to 682MB for 4MB kmalloc. */ #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \ ~(DT_MAX_BRW_SIZE - 1)) -static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t file_offset) +static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter, + loff_t file_offset) { struct lu_env *env; struct cl_io *io; diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index dd5543b1d183..be35d05a4d0e 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -230,7 +230,6 @@ static int v9fs_launder_page(struct page *page) /** * v9fs_direct_IO - 9P address space operation for direct I/O - * @rw: direction (read or write) * @iocb: target I/O control block * @iov: array of vectors that define I/O buffer * @pos: offset in file to begin the operation @@ -248,7 +247,7 @@ static int v9fs_launder_page(struct page *page) * */ static ssize_t -v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { struct file *file = iocb->ki_filp; ssize_t n; diff --git a/fs/affs/file.c b/fs/affs/file.c index 7f05a468d594..dcf27951781c 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -389,8 +389,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to) } static ssize_t -affs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +affs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/block_dev.c b/fs/block_dev.c index bc23afd35fdb..6e3de63c3055 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -146,8 +146,7 @@ blkdev_get_block(struct inode *inode, sector_t iblock, } static ssize_t -blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ca69e83d4f3c..43192e10cc43 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8114,8 +8114,8 @@ out: return retval; } -static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index fd5599d32362..155ab9c0246b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1198,8 +1198,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, * intercept O_DIRECT reads and writes early, this function should * never get called. */ -static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { WARN_ON(1); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ca30c391a894..72394c5abd0f 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3877,8 +3877,7 @@ void cifs_oplock_break(struct work_struct *work) * Direct IO is not yet supported in the cached mode. */ static ssize_t -cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { /* * FIXME diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index a198e94813fe..35073aaec6e0 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -963,8 +963,8 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset, /* TODO: Should be easy enough to do proprly */ -static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t exofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { return 0; } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 685e514c57dd..e1abf75e994c 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -851,8 +851,7 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) } static ssize_t -ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c70839d26ccd..13c0868c7160 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1820,8 +1820,8 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * crashes then stale disk data _may_ be exposed inside the file. But current * VFS code falls back into buffered path in that case so we are safe. */ -static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cf6ba6536035..42c942a950e1 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3089,8 +3089,8 @@ retake_lock: return ret; } -static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ce25f62edfa7..319eda511c4f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1135,8 +1135,8 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter, return 0; } -static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 342d791b28db..41b729933638 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -245,8 +245,7 @@ static int fat_write_end(struct file *file, struct address_space *mapping, return err; } -static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index c1a67da6a8a0..3d355e946991 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2782,8 +2782,7 @@ static inline loff_t fuse_round_up(loff_t off) } static ssize_t -fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { DECLARE_COMPLETION_ONSTACK(wait); ssize_t ret = 0; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index e22e6e686a11..20dd33da92de 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1038,8 +1038,8 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) -static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 0085d527a55c..75fd5d873c19 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -124,8 +124,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask) return res ? try_to_free_buffers(page) : 0; } -static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t hfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index afcde36b506b..a43811f90935 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -122,8 +122,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) return res ? try_to_free_buffers(page) : 0; } -static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index e7047b63ffc5..070dc4b33544 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -330,8 +330,8 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, jfs_get_block); } -static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e8482b8f4830..06503bc604e1 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -240,7 +240,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, /** * nfs_direct_IO - NFS address space operation for direct I/O - * @rw: direction (read or write) * @iocb: target I/O control block * @iov: array of vectors that define I/O buffer * @pos: offset in file to begin the operation @@ -251,7 +250,7 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, * shunt off direct read and write requests before the VFS gets them, * so this method is only ever called for swap. */ -ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) +ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { struct inode *inode = iocb->ki_filp->f_mapping->host; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4726f1493d5d..36f057fa8aa3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -305,8 +305,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) +nilfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0ee9474cca46..28b5ad81bbec 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -818,9 +818,7 @@ out: return ret; } -static ssize_t ocfs2_direct_IO(int rw, - struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d61ab13244a8..742242b60972 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3278,8 +3278,8 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) * We thank Mingming Cao for helping us understand in great detail what * to do in this section of the code. */ -static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, loff_t offset) +static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; diff --git a/fs/udf/file.c b/fs/udf/file.c index 74050bff64f4..78d42548b260 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -100,8 +100,7 @@ static int udf_adinicb_write_begin(struct file *file, return 0; } -static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t udf_adinicb_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { /* Fallback to buffered I/O. */ diff --git a/fs/udf/inode.c b/fs/udf/inode.c index a685aea93068..4f178c83b04f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -215,8 +215,7 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, return ret; } -static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, - struct iov_iter *iter, +static ssize_t udf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 532d5279df2f..1d8eef9cf0f5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1495,7 +1495,6 @@ xfs_end_io_direct_write( STATIC ssize_t xfs_vm_direct_IO( - int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) diff --git a/include/linux/fs.h b/include/linux/fs.h index 295bc589fe1b..72e3759de8c3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -383,7 +383,7 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); + ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset); /* * migrate the contents of a page to the specified target. If * migrate_mode is MIGRATE_ASYNC, it must not block. diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b01ccf371fdc..3d1b0d2fe55e 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -447,7 +447,7 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file) /* * linux/fs/nfs/direct.c */ -extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); +extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); diff --git a/mm/filemap.c b/mm/filemap.c index 876f4e6f3ed6..9920db455f05 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1707,7 +1707,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) pos + count - 1); if (!retval) { struct iov_iter data = *iter; - retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos); + retval = mapping->a_ops->direct_IO(iocb, &data, pos); } if (retval > 0) { @@ -2395,7 +2395,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos) } data = *from; - written = mapping->a_ops->direct_IO(WRITE, iocb, &data, pos); + written = mapping->a_ops->direct_IO(iocb, &data, pos); /* * Finally, try again to invalidate clean pages which might have been diff --git a/mm/page_io.c b/mm/page_io.c index a96c8562d835..6424869e275e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -277,9 +277,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE, - &kiocb, &from, - kiocb.ki_pos); + ret = mapping->a_ops->direct_IO(&kiocb, &from, kiocb.ki_pos); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; From f765b134c0d3f294f6084d3e0a11de184059a387 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2015 20:50:38 -0400 Subject: [PATCH 06/30] new_sync_write(): discard ->ki_pos unless the return value is positive That allows ->write_iter() instances much more convenient life wrt iocb->ki_pos (and fixes several filesystems with borderline POSIX violations when zero-length write succeeds and changes the current position). Signed-off-by: Al Viro --- fs/read_write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/read_write.c b/fs/read_write.c index 45d583c33879..819ef3faf1bb 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -477,7 +477,8 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t ret = filp->f_op->write_iter(&kiocb, &iter); BUG_ON(ret == -EIOCBQUEUED); - *ppos = kiocb.ki_pos; + if (ret > 0) + *ppos = kiocb.ki_pos; return ret; } From ccca26835dc27f7ba54e09d7aa03f462684a1927 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 5 Apr 2015 14:06:24 -0400 Subject: [PATCH 07/30] ntfs: move iov_iter_truncate() closer to generic_write_checks() Signed-off-by: Al Viro --- fs/ntfs/file.c | 81 ++++++++++++++++++-------------------------------- 1 file changed, 29 insertions(+), 52 deletions(-) diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 840e95e3f1d2..77087d5ad458 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -328,26 +328,29 @@ err_out: return err; } -static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, - size_t *count) +static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, + struct iov_iter *from) { loff_t pos; s64 end, ll; ssize_t err; unsigned long flags; + struct file *file = iocb->ki_filp; struct inode *vi = file_inode(file); ntfs_inode *base_ni, *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; + size_t count = iov_iter_count(from); ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " - "0x%llx, count 0x%lx.", vi->i_ino, + "0x%llx, count 0x%zx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)*ppos, (unsigned long)*count); - /* We can write back this queue in page reclaim. */ - current->backing_dev_info = inode_to_bdi(vi); - err = generic_write_checks(file, ppos, count, S_ISBLK(vi->i_mode)); + (unsigned long long)iocb->ki_pos, count); + err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(vi->i_mode)); if (unlikely(err)) goto out; + iov_iter_truncate(from, count); + if (count == 0) + goto out; /* * All checks have passed. Before we start doing any writing we want * to abort any totally illegal writes. @@ -379,8 +382,6 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, err = -EOPNOTSUPP; goto out; } - if (*count == 0) - goto out; base_ni = ni; if (NInoAttr(ni)) base_ni = ni->ext.base_ntfs_ino; @@ -392,9 +393,9 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, * cannot fail either so there is no need to check the return code. */ file_update_time(file); - pos = *ppos; + pos = iocb->ki_pos; /* The first byte after the last cluster being written to. */ - end = (pos + *count + vol->cluster_size_mask) & + end = (pos + iov_iter_count(from) + vol->cluster_size_mask) & ~(u64)vol->cluster_size_mask; /* * If the write goes beyond the allocated size, extend the allocation @@ -422,7 +423,7 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, "partially extended.", vi->i_ino, (unsigned) le32_to_cpu(ni->type)); - *count = ll - pos; + iov_iter_truncate(from, ll - pos); } } else { err = ll; @@ -438,7 +439,7 @@ static ssize_t ntfs_prepare_file_for_write(struct file *file, loff_t *ppos, vi->i_ino, (unsigned) le32_to_cpu(ni->type), (int)-err); - *count = ll - pos; + iov_iter_truncate(from, ll - pos); } else { if (err != -ENOSPC) ntfs_error(vi->i_sb, "Cannot perform " @@ -1929,61 +1930,37 @@ again: return written ? written : status; } -/** - * ntfs_file_write_iter_nolock - write data to a file - * @iocb: IO state structure (file, offset, etc.) - * @from: iov_iter with data to write - * - * Basically the same as __generic_file_write_iter() except that it ends - * up calling ntfs_perform_write() instead of generic_perform_write() and that - * O_DIRECT is not implemented. - */ -static ssize_t ntfs_file_write_iter_nolock(struct kiocb *iocb, - struct iov_iter *from) -{ - struct file *file = iocb->ki_filp; - loff_t pos = iocb->ki_pos; - ssize_t written = 0; - ssize_t err; - size_t count = iov_iter_count(from); - - err = ntfs_prepare_file_for_write(file, &pos, &count); - if (count && !err) { - iov_iter_truncate(from, count); - written = ntfs_perform_write(file, from, pos); - if (likely(written >= 0)) - iocb->ki_pos = pos + written; - } - current->backing_dev_info = NULL; - return written ? written : err; -} - /** * ntfs_file_write_iter - simple wrapper for ntfs_file_write_iter_nolock() * @iocb: IO state structure * @from: iov_iter with data to write * * Basically the same as generic_file_write_iter() except that it ends up - * calling ntfs_file_write_iter_nolock() instead of - * __generic_file_write_iter(). + * up calling ntfs_perform_write() instead of generic_perform_write() and that + * O_DIRECT is not implemented. */ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *vi = file_inode(file); - ssize_t ret; + ssize_t written = 0; + ssize_t err; mutex_lock(&vi->i_mutex); - ret = ntfs_file_write_iter_nolock(iocb, from); + /* We can write back this queue in page reclaim. */ + current->backing_dev_info = inode_to_bdi(vi); + err = ntfs_prepare_file_for_write(iocb, from); + if (iov_iter_count(from) && !err) + written = ntfs_perform_write(file, from, iocb->ki_pos); + current->backing_dev_info = NULL; mutex_unlock(&vi->i_mutex); - if (ret > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - ret, ret); + if (likely(written > 0)) { + err = generic_write_sync(file, iocb->ki_pos, written); if (err < 0) - ret = err; + written = 0; } - return ret; + iocb->ki_pos += written; + return written ? written : err; } /** From e9d1593d4e9311bca040ecf6ec7599e6f235140c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 6 Apr 2015 22:44:11 -0400 Subject: [PATCH 08/30] cifs: fold cifs_iovec_write() into the only caller Signed-off-by: Al Viro --- fs/cifs/file.c | 47 ++++++++++++++++------------------------------- 1 file changed, 16 insertions(+), 31 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 72394c5abd0f..3cb04129ddb1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2560,9 +2560,9 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, return rc; } -static ssize_t -cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; size_t len; ssize_t total_written = 0; struct cifsFileInfo *open_file; @@ -2573,8 +2573,14 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) struct iov_iter saved_from; int rc; + /* + * BB - optimize the way when signing is disabled. We can drop this + * extra memory-to-memory copying and use iovec buffers for constructing + * write request. + */ + len = iov_iter_count(from); - rc = generic_write_checks(file, poffset, &len, 0); + rc = generic_write_checks(file, &iocb->ki_pos, &len, 0); if (rc) return rc; @@ -2593,7 +2599,7 @@ cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) memcpy(&saved_from, from, sizeof(struct iov_iter)); - rc = cifs_write_from_iter(*poffset, len, from, open_file, cifs_sb, + rc = cifs_write_from_iter(iocb->ki_pos, len, from, open_file, cifs_sb, &wdata_list); /* @@ -2633,7 +2639,7 @@ restart_loop: memcpy(&tmp_from, &saved_from, sizeof(struct iov_iter)); iov_iter_advance(&tmp_from, - wdata->offset - *poffset); + wdata->offset - iocb->ki_pos); rc = cifs_write_from_iter(wdata->offset, wdata->bytes, &tmp_from, @@ -2650,34 +2656,13 @@ restart_loop: kref_put(&wdata->refcount, cifs_uncached_writedata_release); } - if (total_written > 0) - *poffset += total_written; + if (unlikely(!total_written)) + return rc; + iocb->ki_pos += total_written; + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(file_inode(file))->flags); cifs_stats_bytes_written(tcon, total_written); - return total_written ? total_written : (ssize_t)rc; -} - -ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) -{ - ssize_t written; - struct inode *inode; - loff_t pos = iocb->ki_pos; - - inode = file_inode(iocb->ki_filp); - - /* - * BB - optimize the way when signing is disabled. We can drop this - * extra memory-to-memory copying and use iovec buffers for constructing - * write request. - */ - - written = cifs_iovec_write(iocb->ki_filp, from, &pos); - if (written > 0) { - set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); - iocb->ki_pos = pos; - } - - return written; + return total_written; } static ssize_t From 0b8def9d6dfa6b2a9a2740cf81d8d2c134688d39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 10:22:53 -0400 Subject: [PATCH 09/30] __generic_file_write_iter: keep ->ki_pos and return value consistent A side effect worth noting: in O_APPEND case we set ->ki_pos early, so if it turns out to be an error or a zero-length write, we'll end up with ->ki_pos modified. Safe, since all callers never look at the ->ki_pos after the call of __generic_file_write_iter() returning non-positive, all the way to caller of ->write_iter() and those discard ->ki_pos when getting that. Signed-off-by: Al Viro --- mm/filemap.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 9920db455f05..353f82e09e63 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2557,7 +2557,6 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct address_space * mapping = file->f_mapping; struct inode *inode = mapping->host; - loff_t pos = iocb->ki_pos; ssize_t written = 0; ssize_t err; ssize_t status; @@ -2565,7 +2564,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(inode->i_mode)); if (err) goto out; @@ -2583,9 +2582,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; if (io_is_direct(file)) { - loff_t endbyte; + loff_t pos, endbyte; - written = generic_file_direct_write(iocb, from, pos); + written = generic_file_direct_write(iocb, from, iocb->ki_pos); /* * If the write stopped short of completing, fall back to * buffered writes. Some filesystems do this for writes to @@ -2593,13 +2592,10 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) * not succeed (even if it did, DAX does not handle dirty * page-cache pages correctly). */ - if (written < 0 || written == count || IS_DAX(inode)) + if (written < 0 || !iov_iter_count(from) || IS_DAX(inode)) goto out; - pos += written; - count -= written; - - status = generic_perform_write(file, from, pos); + status = generic_perform_write(file, from, pos = iocb->ki_pos); /* * If generic_perform_write() returned a synchronous error * then we want to return the number of bytes which were @@ -2611,15 +2607,15 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) err = status; goto out; } - iocb->ki_pos = pos + status; /* * We need to ensure that the page cache pages are written to * disk and invalidated to preserve the expected O_DIRECT * semantics. */ endbyte = pos + status - 1; - err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); + err = filemap_write_and_wait_range(mapping, pos, endbyte); if (err == 0) { + iocb->ki_pos = endbyte + 1; written += status; invalidate_mapping_pages(mapping, pos >> PAGE_CACHE_SHIFT, @@ -2631,9 +2627,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) */ } } else { - written = generic_perform_write(file, from, pos); - if (likely(written >= 0)) - iocb->ki_pos = pos + written; + written = generic_perform_write(file, from, iocb->ki_pos); + if (likely(written > 0)) + iocb->ki_pos += written; } out: current->backing_dev_info = NULL; From 5f380c7fa7e01f15ca0816bd241ece9a64a73192 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 11:28:12 -0400 Subject: [PATCH 10/30] lift generic_write_checks() into callers of __generic_file_write_iter() Signed-off-by: Al Viro --- fs/block_dev.c | 10 ++++++++++ fs/cifs/file.c | 39 +++++++++++++++++++++++---------------- fs/ext4/file.c | 14 +++++++++++--- fs/udf/file.c | 10 ++++++++++ mm/filemap.c | 17 ++++++----------- 5 files changed, 60 insertions(+), 30 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 6e3de63c3055..bcd7f97beab9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1597,6 +1597,16 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct blk_plug plug; ssize_t ret; + size_t count = iov_iter_count(from); + + ret = generic_write_checks(file, &iocb->ki_pos, &count, 1); + if (ret) + return ret; + + if (count == 0) + return 0; + + iov_iter_truncate(from, count); blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3cb04129ddb1..3c5c9bc5cbaf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2673,8 +2673,8 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; - ssize_t rc = -EACCES; - loff_t lock_pos = iocb->ki_pos; + ssize_t rc; + size_t count; /* * We need to hold the sem to be sure nobody modifies lock list @@ -2682,23 +2682,30 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) */ down_read(&cinode->lock_sem); mutex_lock(&inode->i_mutex); - if (file->f_flags & O_APPEND) - lock_pos = i_size_read(inode); - if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from), + + count = iov_iter_count(from); + rc = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (rc) + goto out; + + if (count == 0) + goto out; + + iov_iter_truncate(from, count); + + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { + CIFS_WRITE_OP)) rc = __generic_file_write_iter(iocb, from); - mutex_unlock(&inode->i_mutex); + else + rc = -EACCES; +out: + mutex_unlock(&inode->i_mutex); - if (rc > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } - } else { - mutex_unlock(&inode->i_mutex); + if (rc > 0) { + ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc); + if (err < 0) + rc = err; } up_read(&cinode->lock_sem); return rc; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 9ad03036d9f5..f7cca423dded 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -132,9 +132,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EFBIG; goto errout; } - - if (pos + length > sbi->s_bitmap_maxbytes) - iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); + iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); + length = iov_iter_count(from); } iocb->private = &overwrite; @@ -172,7 +171,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } + ret = generic_write_checks(file, &iocb->ki_pos, &length, 0); + if (ret) + goto out; + + if (length == 0) + goto out; + + iov_iter_truncate(from, length); ret = __generic_file_write_iter(iocb, from); +out: mutex_unlock(&inode->i_mutex); if (ret > 0) { diff --git a/fs/udf/file.c b/fs/udf/file.c index 78d42548b260..35e81ed99405 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -151,7 +151,17 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } else up_write(&iinfo->i_data_sem); + retval = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (retval) + goto out; + + if (count == 0) + goto out; + + iov_iter_truncate(from, count); + retval = __generic_file_write_iter(iocb, from); +out: mutex_unlock(&inode->i_mutex); if (retval > 0) { diff --git a/mm/filemap.c b/mm/filemap.c index 353f82e09e63..a794a7f98743 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2560,19 +2560,9 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t written = 0; ssize_t err; ssize_t status; - size_t count = iov_iter_count(from); /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(inode->i_mode)); - if (err) - goto out; - - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - err = file_remove_suid(file); if (err) goto out; @@ -2651,9 +2641,14 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; + size_t count = iov_iter_count(from); mutex_lock(&inode->i_mutex); - ret = __generic_file_write_iter(iocb, from); + ret = generic_write_checks(file, &iocb->ki_pos, &count, 0); + if (!ret && count) { + iov_iter_truncate(from, count); + ret = __generic_file_write_iter(iocb, from); + } mutex_unlock(&inode->i_mutex); if (ret > 0) { From 7ec7b94a3339756dfbb88234e3e45a428e8c08fb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 11:35:14 -0400 Subject: [PATCH 11/30] blkdev_write_iter: expand generic_file_checks() call in there Signed-off-by: Al Viro --- fs/block_dev.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index bcd7f97beab9..897ee0503932 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1595,18 +1595,21 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; + struct inode *bd_inode = file->f_mapping->host; + loff_t size = i_size_read(bd_inode); struct blk_plug plug; ssize_t ret; - size_t count = iov_iter_count(from); - ret = generic_write_checks(file, &iocb->ki_pos, &count, 1); - if (ret) - return ret; + if (bdev_read_only(I_BDEV(bd_inode))) + return -EPERM; - if (count == 0) + if (!iov_iter_count(from)) return 0; - iov_iter_truncate(from, count); + if (iocb->ki_pos >= size) + return -ENOSPC; + + iov_iter_truncate(from, size - iocb->ki_pos); blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); From 0fa6b005afdb3152ce85df963302e59b61115f9b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 4 Apr 2015 04:05:48 -0400 Subject: [PATCH 12/30] generic_write_checks(): drop isblk argument all remaining callers are passing 0; some just obscure that fact. Signed-off-by: Al Viro --- fs/9p/vfs_file.c | 2 +- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 2 +- fs/cifs/file.c | 4 +-- fs/ext4/file.c | 2 +- fs/fuse/file.c | 6 ++--- fs/ncpfs/file.c | 2 +- fs/nfs/direct.c | 2 +- fs/ntfs/file.c | 2 +- fs/ocfs2/file.c | 3 +-- fs/udf/file.c | 2 +- fs/xfs/xfs_file.c | 2 +- include/linux/fs.h | 2 +- mm/filemap.c | 63 +++++++++++++++------------------------------- 14 files changed, 36 insertions(+), 60 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index d7fcb775311e..b5b020ace1b3 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -409,7 +409,7 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); int err = 0; - retval = generic_write_checks(file, &origin, &count, 0); + retval = generic_write_checks(file, &origin, &count); if (retval) return retval; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index cdc801c85105..691a84a81e09 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1747,7 +1747,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, mutex_lock(&inode->i_mutex); current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &pos, &count); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 56237ea5fc22..761841903160 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -953,7 +953,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &pos, &count); if (err) goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3c5c9bc5cbaf..4202e74b2db5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2580,7 +2580,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) */ len = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &len, 0); + rc = generic_write_checks(file, &iocb->ki_pos, &len); if (rc) return rc; @@ -2684,7 +2684,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) mutex_lock(&inode->i_mutex); count = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &count, 0); + rc = generic_write_checks(file, &iocb->ki_pos, &count); if (rc) goto out; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index f7cca423dded..1f0afc181b7b 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -171,7 +171,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } - ret = generic_write_checks(file, &iocb->ki_pos, &length, 0); + ret = generic_write_checks(file, &iocb->ki_pos, &length); if (ret) goto out; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 3d355e946991..4c04a8144a75 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1167,7 +1167,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); + err = generic_write_checks(file, &pos, &count); if (err) goto out; @@ -1420,7 +1420,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = generic_write_checks(file, &iocb->ki_pos, &count, 0); + res = generic_write_checks(file, &iocb->ki_pos, &count); if (!res) { iov_iter_truncate(from, count); res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); @@ -2841,7 +2841,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) io->done = &wait; if (iov_iter_rw(iter) == WRITE) { - ret = generic_write_checks(file, &pos, &count, 0); + ret = generic_write_checks(file, &pos, &count); if (!ret) { iov_iter_truncate(iter, count); ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 479bf8db264e..ab6363b16556 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -177,7 +177,7 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) void *bouncebuffer; ncp_dbg(1, "enter %pD2\n", file); - errno = generic_write_checks(file, &pos, &count, 0); + errno = generic_write_checks(file, &pos, &count); if (errno) return errno; iov_iter_truncate(from, count); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 06503bc604e1..5ddd77acb3f7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -977,7 +977,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, count, (long long) pos); - result = generic_write_checks(file, &pos, &count, 0); + result = generic_write_checks(file, &pos, &count); if (result) goto out; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 77087d5ad458..cec4ec3c1ede 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -345,7 +345,7 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, "0x%llx, count 0x%zx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), (unsigned long long)iocb->ki_pos, count); - err = generic_write_checks(file, &iocb->ki_pos, &count, S_ISBLK(vi->i_mode)); + err = generic_write_checks(file, &iocb->ki_pos, &count); if (unlikely(err)) goto out; iov_iter_truncate(from, count); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0a6ec7e6efd8..1c11314946cb 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2374,8 +2374,7 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_write_checks(file, ppos, &count, - S_ISBLK(inode->i_mode)); + ret = generic_write_checks(file, ppos, &count); if (ret) goto out_dio; diff --git a/fs/udf/file.c b/fs/udf/file.c index 35e81ed99405..6834509a7e5a 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -151,7 +151,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } else up_write(&iinfo->i_data_sem); - retval = generic_write_checks(file, &iocb->ki_pos, &count, 0); + retval = generic_write_checks(file, &iocb->ki_pos, &count); if (retval) goto out; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 44856c3b9617..43c0e6686c47 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -554,7 +554,7 @@ xfs_file_aio_write_checks( int error = 0; restart: - error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); + error = generic_write_checks(file, pos, count); if (error) return error; diff --git a/include/linux/fs.h b/include/linux/fs.h index 72e3759de8c3..492948ea4c9b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2566,7 +2566,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); +int generic_write_checks(struct file *file, loff_t *pos, size_t *count); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index a794a7f98743..dfc573c6ec25 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2260,7 +2260,7 @@ EXPORT_SYMBOL(read_cache_page_gfp); * Returns appropriate error code that caller should return or * zero in case that write should be allowed. */ -inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk) +inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count) { struct inode *inode = file->f_mapping->host; unsigned long limit = rlimit(RLIMIT_FSIZE); @@ -2268,20 +2268,17 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (unlikely(*pos < 0)) return -EINVAL; - if (!isblk) { - /* FIXME: this is for backwards compatibility with 2.4 */ - if (file->f_flags & O_APPEND) - *pos = i_size_read(inode); + /* FIXME: this is for backwards compatibility with 2.4 */ + if (file->f_flags & O_APPEND) + *pos = i_size_read(inode); - if (limit != RLIM_INFINITY) { - if (*pos >= limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - if (*count > limit - (typeof(limit))*pos) { - *count = limit - (typeof(limit))*pos; - } + if (limit != RLIM_INFINITY) { + if (*pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; } + if (*count > limit - (typeof(limit))*pos) + *count = limit - (typeof(limit))*pos; } /* @@ -2289,12 +2286,10 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i */ if (unlikely(*pos + *count > MAX_NON_LFS && !(file->f_flags & O_LARGEFILE))) { - if (*pos >= MAX_NON_LFS) { + if (*pos >= MAX_NON_LFS) return -EFBIG; - } - if (*count > MAX_NON_LFS - (unsigned long)*pos) { + if (*count > MAX_NON_LFS - (unsigned long)*pos) *count = MAX_NON_LFS - (unsigned long)*pos; - } } /* @@ -2304,33 +2299,15 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i * exceeded without writing data we send a signal and return EFBIG. * Linus frestrict idea will clean these up nicely.. */ - if (likely(!isblk)) { - if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { - if (*count || *pos > inode->i_sb->s_maxbytes) { - return -EFBIG; - } - /* zero-length writes at ->s_maxbytes are OK */ + if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { + if (*count || *pos > inode->i_sb->s_maxbytes) { + return -EFBIG; } - - if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) - *count = inode->i_sb->s_maxbytes - *pos; - } else { -#ifdef CONFIG_BLOCK - loff_t isize; - if (bdev_read_only(I_BDEV(inode))) - return -EPERM; - isize = i_size_read(inode); - if (*pos >= isize) { - if (*count || *pos > isize) - return -ENOSPC; - } - - if (*pos + *count > isize) - *count = isize - *pos; -#else - return -EPERM; -#endif + /* zero-length writes at ->s_maxbytes are OK */ } + + if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) + *count = inode->i_sb->s_maxbytes - *pos; return 0; } EXPORT_SYMBOL(generic_write_checks); @@ -2644,7 +2621,7 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) size_t count = iov_iter_count(from); mutex_lock(&inode->i_mutex); - ret = generic_write_checks(file, &iocb->ki_pos, &count, 0); + ret = generic_write_checks(file, &iocb->ki_pos, &count); if (!ret && count) { iov_iter_truncate(from, count); ret = __generic_file_write_iter(iocb, from); From 99733fa372eaaa59cfb93fd383cee7b0ff056e1d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 14:25:18 -0400 Subject: [PATCH 13/30] xfs_file_aio_write_checks: switch to iocb/iov_iter Signed-off-by: Al Viro --- fs/xfs/xfs_file.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 43c0e6686c47..ebde43e15dd9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -544,17 +544,18 @@ xfs_zero_eof( */ STATIC ssize_t xfs_file_aio_write_checks( - struct file *file, - loff_t *pos, - size_t *count, + struct kiocb *iocb, + struct iov_iter *from, int *iolock) { + struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); int error = 0; + size_t count = iov_iter_count(from); restart: - error = generic_write_checks(file, pos, count); + error = generic_write_checks(file, &iocb->ki_pos, &count); if (error) return error; @@ -569,7 +570,7 @@ restart: * iolock shared, we need to update it to exclusive which implies * having to redo all checks before. */ - if (*pos > i_size_read(inode)) { + if (iocb->ki_pos > i_size_read(inode)) { bool zero = false; if (*iolock == XFS_IOLOCK_SHARED) { @@ -578,10 +579,11 @@ restart: xfs_rw_ilock(ip, *iolock); goto restart; } - error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero); + error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); if (error) return error; } + iov_iter_truncate(from, count); /* * Updating the timestamps will grab the ilock again from @@ -678,10 +680,11 @@ xfs_file_dio_aio_write( xfs_rw_ilock(ip, iolock); } - ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - iov_iter_truncate(from, count); + count = iov_iter_count(from); + pos = iocb->ki_pos; if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, @@ -734,24 +737,22 @@ xfs_file_buffered_aio_write( ssize_t ret; int enospc = 0; int iolock = XFS_IOLOCK_EXCL; - loff_t pos = iocb->ki_pos; - size_t count = iov_iter_count(from); xfs_rw_ilock(ip, iolock); - ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; - iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); write_retry: - trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_perform_write(file, from, pos); + trace_xfs_file_buffered_write(ip, iov_iter_count(from), + iocb->ki_pos, 0); + ret = generic_perform_write(file, from, iocb->ki_pos); if (likely(ret >= 0)) - iocb->ki_pos = pos + ret; + iocb->ki_pos += ret; /* * If we hit a space limit, try to free up some lingering preallocated From e768d7ff7b923a74a019d8782e6ee75dc1de12c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 14:48:22 -0400 Subject: [PATCH 14/30] ext4_file_write_iter: move generic_write_checks() up simpler that way... Signed-off-by: Al Viro --- fs/ext4/file.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1f0afc181b7b..42b1fa33a17a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -99,7 +99,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) int overwrite = 0; size_t length = iov_iter_count(from); ssize_t ret; - loff_t pos = iocb->ki_pos; + loff_t pos; /* * Unaligned direct AIO must be serialized; see comment above @@ -109,15 +109,22 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && (file->f_flags & O_APPEND || - ext4_unaligned_aio(inode, from, pos))) { + ext4_unaligned_aio(inode, from, iocb->ki_pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); ext4_unwritten_wait(inode); } mutex_lock(&inode->i_mutex); - if (file->f_flags & O_APPEND) - iocb->ki_pos = pos = i_size_read(inode); + ret = generic_write_checks(file, &iocb->ki_pos, &length); + if (ret) + goto out; + + if (length == 0) + goto out; + + iov_iter_truncate(from, length); + pos = iocb->ki_pos; /* * If we have encountered a bitmap-format file, the size limit @@ -126,18 +133,16 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if ((pos > sbi->s_bitmap_maxbytes) || - (pos == sbi->s_bitmap_maxbytes && length > 0)) { - mutex_unlock(&inode->i_mutex); + if (pos >= sbi->s_bitmap_maxbytes) { ret = -EFBIG; - goto errout; + goto out; } iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); - length = iov_iter_count(from); } iocb->private = &overwrite; if (o_direct) { + length = iov_iter_count(from); blk_start_plug(&plug); @@ -171,16 +176,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } } - ret = generic_write_checks(file, &iocb->ki_pos, &length); - if (ret) - goto out; - - if (length == 0) - goto out; - - iov_iter_truncate(from, length); ret = __generic_file_write_iter(iocb, from); -out: mutex_unlock(&inode->i_mutex); if (ret > 0) { @@ -193,7 +189,12 @@ out: if (o_direct) blk_finish_plug(&plug); -errout: + if (aio_mutex) + mutex_unlock(aio_mutex); + return ret; + +out: + mutex_unlock(&inode->i_mutex); if (aio_mutex) mutex_unlock(aio_mutex); return ret; From 6b775b18eecf60b8a44723e05f8eb6251b71a7a9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 15:06:19 -0400 Subject: [PATCH 15/30] fuse: ->direct_IO() doesn't need generic_write_checks() already done by caller. We used to call __fuse_direct_write(), which called generic_write_checks(); now the former got expanded, bringing the latter to the surface. It used to be called all along and calling it from there had been wrong all along... Signed-off-by: Al Viro --- fs/fuse/file.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 4c04a8144a75..8c15d0a077e8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2806,8 +2806,8 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) if (async_dio && iov_iter_rw(iter) != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; - count = min_t(loff_t, count, fuse_round_up(i_size - offset)); - iov_iter_truncate(iter, count); + iov_iter_truncate(iter, fuse_round_up(i_size - offset)); + count = iov_iter_count(iter); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); @@ -2841,12 +2841,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) io->done = &wait; if (iov_iter_rw(iter) == WRITE) { - ret = generic_write_checks(file, &pos, &count); - if (!ret) { - iov_iter_truncate(iter, count); - ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); - } - + ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); fuse_invalidate_attr(inode); } else { ret = __fuse_direct_read(io, iter, &pos); From 165f1a6e300d5a1ffb57cf9a9c8762de731228f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 7 Apr 2015 15:26:36 -0400 Subject: [PATCH 16/30] udf_file_write_iter: reorder and simplify it's easier to do generic_write_checks() first Signed-off-by: Al Viro --- fs/udf/file.c | 46 ++++++++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/fs/udf/file.c b/fs/udf/file.c index 6834509a7e5a..ccab8b78e363 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -120,36 +120,11 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t retval; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - int err, pos; size_t count = iov_iter_count(from); struct udf_inode_info *iinfo = UDF_I(inode); + int err; mutex_lock(&inode->i_mutex); - down_write(&iinfo->i_data_sem); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - if (file->f_flags & O_APPEND) - pos = inode->i_size; - else - pos = iocb->ki_pos; - - if (inode->i_sb->s_blocksize < - (udf_file_entry_alloc_offset(inode) + - pos + count)) { - err = udf_expand_file_adinicb(inode); - if (err) { - mutex_unlock(&inode->i_mutex); - udf_debug("udf_expand_adinicb: err=%d\n", err); - return err; - } - } else { - if (pos + count > inode->i_size) - iinfo->i_lenAlloc = pos + count; - else - iinfo->i_lenAlloc = inode->i_size; - up_write(&iinfo->i_data_sem); - } - } else - up_write(&iinfo->i_data_sem); retval = generic_write_checks(file, &iocb->ki_pos, &count); if (retval) @@ -160,6 +135,25 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) iov_iter_truncate(from, count); + down_write(&iinfo->i_data_sem); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { + loff_t end = iocb->ki_pos + iov_iter_count(from); + + if (inode->i_sb->s_blocksize < + (udf_file_entry_alloc_offset(inode) + end)) { + err = udf_expand_file_adinicb(inode); + if (err) { + mutex_unlock(&inode->i_mutex); + udf_debug("udf_expand_adinicb: err=%d\n", err); + return err; + } + } else { + iinfo->i_lenAlloc = max(end, inode->i_size); + up_write(&iinfo->i_data_sem); + } + } else + up_write(&iinfo->i_data_sem); + retval = __generic_file_write_iter(iocb, from); out: mutex_unlock(&inode->i_mutex); From 5dc3161cb63265adca0c34fac79512af59b776a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 07:25:03 -0400 Subject: [PATCH 17/30] ocfs2_file_write_iter: stop messing with ppos it's &iocb->ki_pos; no need to obfuscate. Signed-off-by: Al Viro --- fs/ocfs2/file.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8096fb6c081b..78e245df5e32 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2264,7 +2264,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, int can_do_direct, has_refcount = 0; ssize_t written = 0; size_t count = iov_iter_count(from); - loff_t old_size, *ppos = &iocb->ki_pos; + loff_t old_size; u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -2330,7 +2330,7 @@ relock: } can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending, + ret = ocfs2_prepare_inode_for_write(file, &iocb->ki_pos, count, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2338,7 +2338,7 @@ relock: } if (direct_io && !is_sync_kiocb(iocb)) - unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos); + unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos); /* * We can't complete the direct I/O as requested, fall back to @@ -2374,7 +2374,7 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_write_checks(file, ppos, &count); + ret = generic_write_checks(file, &iocb->ki_pos, &count); if (ret) goto out_dio; @@ -2382,7 +2382,7 @@ relock: if (direct_io) { loff_t endbyte; ssize_t written_buffered; - written = generic_file_direct_write(iocb, from, *ppos); + written = generic_file_direct_write(iocb, from, iocb->ki_pos); if (written < 0 || written == count) { ret = written; goto out_dio; @@ -2392,7 +2392,7 @@ relock: * for completing the rest of the request. */ count -= written; - written_buffered = generic_perform_write(file, from, *ppos); + written_buffered = generic_perform_write(file, from, iocb->ki_pos); /* * If generic_file_buffered_write() returned a synchronous error * then we want to return the number of bytes which were @@ -2409,14 +2409,14 @@ relock: * disk and invalidated to preserve the expected O_DIRECT * semantics. */ - endbyte = *ppos + written_buffered - 1; - ret = filemap_write_and_wait_range(file->f_mapping, *ppos, + endbyte = iocb->ki_pos + written_buffered - 1; + ret = filemap_write_and_wait_range(file->f_mapping, iocb->ki_pos, endbyte); if (ret == 0) { - iocb->ki_pos = *ppos + written_buffered; + iocb->ki_pos += written_buffered; written += written_buffered; invalidate_mapping_pages(mapping, - *ppos >> PAGE_CACHE_SHIFT, + iocb->ki_pos >> PAGE_CACHE_SHIFT, endbyte >> PAGE_CACHE_SHIFT); } else { /* @@ -2426,9 +2426,9 @@ relock: } } else { current->backing_dev_info = inode_to_bdi(inode); - written = generic_perform_write(file, from, *ppos); + written = generic_perform_write(file, from, iocb->ki_pos); if (likely(written >= 0)) - iocb->ki_pos = *ppos + written; + iocb->ki_pos = iocb->ki_pos + written; current->backing_dev_info = NULL; } From 90320251db0fe3d05f2b10686ec936c7d6ecd99a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 11:14:45 -0400 Subject: [PATCH 18/30] ocfs2: move generic_write_checks() before the alignment checks Alignment checks for dio depend upon the range truncation done by generic_write_checks(). They can be done as soon as we got ocfs2_rw_lock() and that actually makes ocfs2_prepare_inode_for_write() simpler. The only thing to watch out for is restoring the original count in "unlock and redo without dio" case. Position doesn't need to be restored, since we change it only in O_APPEND case and in that case it will be reassigned anyway. Signed-off-by: Al Viro --- fs/ocfs2/file.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 78e245df5e32..fc53ff065364 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2106,7 +2106,7 @@ out: } static int ocfs2_prepare_inode_for_write(struct file *file, - loff_t *ppos, + loff_t pos, size_t count, int appending, int *direct_io, @@ -2115,7 +2115,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, int ret = 0, meta_level = 0; struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - loff_t saved_pos = 0, end; + loff_t end; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); @@ -2155,23 +2155,16 @@ static int ocfs2_prepare_inode_for_write(struct file *file, } } - /* work on a copy of ppos until we're sure that we won't have - * to recalculate it due to relocking. */ - if (appending) - saved_pos = i_size_read(inode); - else - saved_pos = *ppos; + end = pos + count; - end = saved_pos + count; - - ret = ocfs2_check_range_for_refcount(inode, saved_pos, count); + ret = ocfs2_check_range_for_refcount(inode, pos, count); if (ret == 1) { ocfs2_inode_unlock(inode, meta_level); meta_level = -1; ret = ocfs2_prepare_inode_for_refcount(inode, file, - saved_pos, + pos, count, &meta_level); if (has_refcount) @@ -2227,7 +2220,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * caller will have to retake some cluster * locks and initiate the io as buffered. */ - ret = ocfs2_check_range_for_holes(inode, saved_pos, count); + ret = ocfs2_check_range_for_holes(inode, pos, count); if (ret == 1) { /* * Fallback to old way if the feature bit is not set. @@ -2242,12 +2235,9 @@ static int ocfs2_prepare_inode_for_write(struct file *file, break; } - if (appending) - *ppos = saved_pos; - out_unlock: trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, - saved_pos, appending, count, + pos, appending, count, direct_io, has_refcount); if (meta_level >= 0) @@ -2263,7 +2253,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, int ret, direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; - size_t count = iov_iter_count(from); + size_t count = iov_iter_count(from), orig_count; loff_t old_size; u32 old_clusters; struct file *file = iocb->ki_filp; @@ -2329,8 +2319,16 @@ relock: ocfs2_inode_unlock(inode, 1); } + orig_count = count; + ret = generic_write_checks(file, &iocb->ki_pos, &count); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + iov_iter_truncate(from, count); + can_do_direct = direct_io; - ret = ocfs2_prepare_inode_for_write(file, &iocb->ki_pos, count, appending, + ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); @@ -2351,6 +2349,7 @@ relock: rw_level = -1; direct_io = 0; + iov_iter_reexpand(from, count = orig_count); goto relock; } @@ -2374,11 +2373,6 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_write_checks(file, &iocb->ki_pos, &count); - if (ret) - goto out_dio; - - iov_iter_truncate(from, count); if (direct_io) { loff_t endbyte; ssize_t written_buffered; From 3309dd04cbcd2cdad168485af5cf3576b5051e49 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 12:55:47 -0400 Subject: [PATCH 19/30] switch generic_write_checks() to iocb and iter ... returning -E... upon error and amount of data left in iter after (possible) truncation upon success. Note, that normal case gives a non-zero (positive) return value, so any tests for != 0 _must_ be updated. Signed-off-by: Al Viro Conflicts: fs/ext4/file.c --- fs/9p/vfs_file.c | 26 ++++++++++---------------- fs/btrfs/file.c | 26 ++++++++++---------------- fs/ceph/file.c | 14 ++++++-------- fs/cifs/file.c | 26 ++++++-------------------- fs/ext4/file.c | 20 ++++++-------------- fs/fuse/file.c | 22 +++++++--------------- fs/ncpfs/file.c | 14 +++++--------- fs/nfs/direct.c | 25 ++++++++++--------------- fs/ntfs/file.c | 11 ++++------- fs/ocfs2/file.c | 19 ++++++++++--------- fs/udf/file.c | 10 ++-------- fs/xfs/xfs_file.c | 8 ++++---- include/linux/fs.h | 2 +- mm/filemap.c | 44 +++++++++++++++++++------------------------- 14 files changed, 100 insertions(+), 167 deletions(-) diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index b5b020ace1b3..2a9dd37dc426 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -404,21 +404,16 @@ static ssize_t v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - ssize_t retval = 0; - loff_t origin = iocb->ki_pos; - size_t count = iov_iter_count(from); + ssize_t retval; + loff_t origin; int err = 0; - retval = generic_write_checks(file, &origin, &count); - if (retval) + retval = generic_write_checks(iocb, from); + if (retval <= 0) return retval; - iov_iter_truncate(from, count); - - if (!count) - return 0; - - retval = p9_client_write(file->private_data, origin, from, &err); + origin = iocb->ki_pos; + retval = p9_client_write(file->private_data, iocb->ki_pos, from, &err); if (retval > 0) { struct inode *inode = file_inode(file); loff_t i_size; @@ -428,12 +423,11 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (inode->i_mapping && inode->i_mapping->nrpages) invalidate_inode_pages2_range(inode->i_mapping, pg_start, pg_end); - origin += retval; + iocb->ki_pos += retval; i_size = i_size_read(inode); - iocb->ki_pos = origin; - if (origin > i_size) { - inode_add_bytes(inode, origin - i_size); - i_size_write(inode, origin); + if (iocb->ki_pos > i_size) { + inode_add_bytes(inode, iocb->ki_pos - i_size); + i_size_write(inode, iocb->ki_pos); } return retval; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 691a84a81e09..c64d11c41eeb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1739,27 +1739,19 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, u64 start_pos; u64 end_pos; ssize_t num_written = 0; - ssize_t err = 0; - size_t count = iov_iter_count(from); bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); - loff_t pos = iocb->ki_pos; + ssize_t err; + loff_t pos; + size_t count; mutex_lock(&inode->i_mutex); + err = generic_write_checks(iocb, from); + if (err <= 0) { + mutex_unlock(&inode->i_mutex); + return err; + } current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count); - if (err) { - mutex_unlock(&inode->i_mutex); - goto out; - } - - if (count == 0) { - mutex_unlock(&inode->i_mutex); - goto out; - } - - iov_iter_truncate(from, count); - err = file_remove_suid(file); if (err) { mutex_unlock(&inode->i_mutex); @@ -1786,6 +1778,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, */ update_time_for_write(inode); + pos = iocb->ki_pos; + count = iov_iter_count(from); start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { /* Expand hole size to cover write data, preventing empty gap */ diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 761841903160..3f0b9339d823 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -941,9 +941,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; - ssize_t count = iov_iter_count(from), written = 0; + ssize_t count, written = 0; int err, want, got; - loff_t pos = iocb->ki_pos; + loff_t pos; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; @@ -953,14 +953,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count); - if (err) + err = generic_write_checks(iocb, from); + if (err <= 0) goto out; - if (count == 0) - goto out; - iov_iter_truncate(from, count); - + pos = iocb->ki_pos; + count = iov_iter_count(from); err = file_remove_suid(file); if (err) goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 4202e74b2db5..ca2bc5406306 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2563,7 +2563,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from, ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - size_t len; ssize_t total_written = 0; struct cifsFileInfo *open_file; struct cifs_tcon *tcon; @@ -2579,16 +2578,10 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) * write request. */ - len = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &len); - if (rc) + rc = generic_write_checks(iocb, from); + if (rc <= 0) return rc; - if (!len) - return 0; - - iov_iter_truncate(from, len); - INIT_LIST_HEAD(&wdata_list); cifs_sb = CIFS_FILE_SB(file); open_file = file->private_data; @@ -2599,8 +2592,8 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) memcpy(&saved_from, from, sizeof(struct iov_iter)); - rc = cifs_write_from_iter(iocb->ki_pos, len, from, open_file, cifs_sb, - &wdata_list); + rc = cifs_write_from_iter(iocb->ki_pos, iov_iter_count(from), from, + open_file, cifs_sb, &wdata_list); /* * If at least one write was successfully sent, then discard any rc @@ -2674,7 +2667,6 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; ssize_t rc; - size_t count; /* * We need to hold the sem to be sure nobody modifies lock list @@ -2683,16 +2675,10 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) down_read(&cinode->lock_sem); mutex_lock(&inode->i_mutex); - count = iov_iter_count(from); - rc = generic_write_checks(file, &iocb->ki_pos, &count); - if (rc) + rc = generic_write_checks(iocb, from); + if (rc <= 0) goto out; - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, NULL, CIFS_WRITE_OP)) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 42b1fa33a17a..c10785f10d1d 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -97,9 +97,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; int o_direct = io_is_direct(file); int overwrite = 0; - size_t length = iov_iter_count(from); ssize_t ret; - loff_t pos; /* * Unaligned direct AIO must be serialized; see comment above @@ -116,16 +114,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) } mutex_lock(&inode->i_mutex); - ret = generic_write_checks(file, &iocb->ki_pos, &length); - if (ret) + ret = generic_write_checks(iocb, from); + if (ret <= 0) goto out; - if (length == 0) - goto out; - - iov_iter_truncate(from, length); - pos = iocb->ki_pos; - /* * If we have encountered a bitmap-format file, the size limit * is smaller than s_maxbytes, which is for extent-mapped files. @@ -133,19 +125,19 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (pos >= sbi->s_bitmap_maxbytes) { + if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) { ret = -EFBIG; goto out; } - iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); + iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); } iocb->private = &overwrite; if (o_direct) { - length = iov_iter_count(from); + size_t length = iov_iter_count(from); + loff_t pos = iocb->ki_pos; blk_start_plug(&plug); - /* check whether we do a DIO overwrite or not */ if (ext4_should_dioread_nolock(inode) && !aio_mutex && !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8c15d0a077e8..b86c8e08399a 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1145,13 +1145,11 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - size_t count = iov_iter_count(from); ssize_t written = 0; ssize_t written_buffered = 0; struct inode *inode = mapping->host; ssize_t err; loff_t endbyte = 0; - loff_t pos = iocb->ki_pos; if (get_fuse_conn(inode)->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ @@ -1167,14 +1165,10 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = generic_write_checks(file, &pos, &count); - if (err) + err = generic_write_checks(iocb, from); + if (err <= 0) goto out; - if (count == 0) - goto out; - - iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) goto out; @@ -1184,6 +1178,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; if (file->f_flags & O_DIRECT) { + loff_t pos = iocb->ki_pos; written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) goto out; @@ -1209,9 +1204,9 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) written += written_buffered; iocb->ki_pos = pos + written_buffered; } else { - written = fuse_perform_write(file, mapping, from, pos); + written = fuse_perform_write(file, mapping, from, iocb->ki_pos); if (written >= 0) - iocb->ki_pos = pos + written; + iocb->ki_pos += written; } out: current->backing_dev_info = NULL; @@ -1412,7 +1407,6 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct fuse_io_priv io = { .async = 0, .file = file }; - size_t count = iov_iter_count(from); ssize_t res; if (is_bad_inode(inode)) @@ -1420,11 +1414,9 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = generic_write_checks(file, &iocb->ki_pos, &count); - if (!res) { - iov_iter_truncate(from, count); + res = generic_write_checks(iocb, from); + if (res > 0) res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); - } fuse_invalidate_attr(inode); if (res > 0) fuse_write_update_size(inode, iocb->ki_pos); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index ab6363b16556..011324ce9df2 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -170,20 +170,15 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_written = 0; - loff_t pos = iocb->ki_pos; - size_t count = iov_iter_count(from); size_t bufsize; int errno; void *bouncebuffer; + off_t pos; ncp_dbg(1, "enter %pD2\n", file); - errno = generic_write_checks(file, &pos, &count); - if (errno) + errno = generic_write_checks(iocb, from); + if (errno <= 0) return errno; - iov_iter_truncate(from, count); - - if (!count) - return 0; errno = ncp_make_open(inode, O_WRONLY); if (errno) { @@ -201,10 +196,11 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from) errno = -EIO; /* -ENOMEM */ goto outrel; } + pos = iocb->ki_pos; while (iov_iter_count(from)) { int written_this_time; size_t to_write = min_t(size_t, - bufsize - ((off_t)pos % bufsize), + bufsize - (pos % bufsize), iov_iter_count(from)); if (copy_from_iter(bouncebuffer, to_write, from) != to_write) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 5ddd77acb3f7..9634189b8545 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -969,24 +969,19 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; loff_t end; - size_t count = iov_iter_count(iter); - end = (pos + count - 1) >> PAGE_CACHE_SHIFT; - - nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", - file, count, (long long) pos); + file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(file, &pos, &count); - if (result) + result = generic_write_checks(iocb, iter); + if (result <= 0) goto out; - result = -EINVAL; - if ((ssize_t) count < 0) - goto out; - result = 0; - if (!count) - goto out; + nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, + iov_iter_count(iter)); + + pos = iocb->ki_pos; + end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT; mutex_lock(&inode->i_mutex); @@ -1001,7 +996,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out_unlock; } - task_io_account_write(count); + task_io_account_write(iov_iter_count(iter)); result = -ENOMEM; dreq = nfs_direct_req_alloc(); @@ -1009,7 +1004,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out_unlock; dreq->inode = inode; - dreq->bytes_left = count; + dreq->bytes_left = iov_iter_count(iter); dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index cec4ec3c1ede..7bb487e663b4 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -339,17 +339,14 @@ static ssize_t ntfs_prepare_file_for_write(struct kiocb *iocb, struct inode *vi = file_inode(file); ntfs_inode *base_ni, *ni = NTFS_I(vi); ntfs_volume *vol = ni->vol; - size_t count = iov_iter_count(from); ntfs_debug("Entering for i_ino 0x%lx, attribute type 0x%x, pos " "0x%llx, count 0x%zx.", vi->i_ino, (unsigned)le32_to_cpu(ni->type), - (unsigned long long)iocb->ki_pos, count); - err = generic_write_checks(file, &iocb->ki_pos, &count); - if (unlikely(err)) - goto out; - iov_iter_truncate(from, count); - if (count == 0) + (unsigned long long)iocb->ki_pos, + iov_iter_count(from)); + err = generic_write_checks(iocb, from); + if (unlikely(err <= 0)) goto out; /* * All checks have passed. Before we start doing any writing we want diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index fc53ff065364..b93919f50f0f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2250,9 +2250,10 @@ out: static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { - int ret, direct_io, appending, rw_level, have_alloc_sem = 0; + int direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; + ssize_t ret; size_t count = iov_iter_count(from), orig_count; loff_t old_size; u32 old_clusters; @@ -2319,13 +2320,14 @@ relock: ocfs2_inode_unlock(inode, 1); } - orig_count = count; - ret = generic_write_checks(file, &iocb->ki_pos, &count); - if (ret < 0) { - mlog_errno(ret); + orig_count = iov_iter_count(from); + ret = generic_write_checks(iocb, from); + if (ret <= 0) { + if (ret) + mlog_errno(ret); goto out; } - iov_iter_truncate(from, count); + count = ret; can_do_direct = direct_io; ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending, @@ -2349,7 +2351,7 @@ relock: rw_level = -1; direct_io = 0; - iov_iter_reexpand(from, count = orig_count); + iov_iter_reexpand(from, orig_count); goto relock; } @@ -2377,7 +2379,7 @@ relock: loff_t endbyte; ssize_t written_buffered; written = generic_file_direct_write(iocb, from, iocb->ki_pos); - if (written < 0 || written == count) { + if (written < 0 || !iov_iter_count(from)) { ret = written; goto out_dio; } @@ -2385,7 +2387,6 @@ relock: /* * for completing the rest of the request. */ - count -= written; written_buffered = generic_perform_write(file, from, iocb->ki_pos); /* * If generic_file_buffered_write() returned a synchronous error diff --git a/fs/udf/file.c b/fs/udf/file.c index ccab8b78e363..3de2edafff73 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -120,21 +120,15 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ssize_t retval; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - size_t count = iov_iter_count(from); struct udf_inode_info *iinfo = UDF_I(inode); int err; mutex_lock(&inode->i_mutex); - retval = generic_write_checks(file, &iocb->ki_pos, &count); - if (retval) + retval = generic_write_checks(iocb, from); + if (retval <= 0) goto out; - if (count == 0) - goto out; - - iov_iter_truncate(from, count); - down_write(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { loff_t end = iocb->ki_pos + iov_iter_count(from); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ebde43e15dd9..28d157807b42 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -551,12 +551,12 @@ xfs_file_aio_write_checks( struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); - int error = 0; + ssize_t error = 0; size_t count = iov_iter_count(from); restart: - error = generic_write_checks(file, &iocb->ki_pos, &count); - if (error) + error = generic_write_checks(iocb, from); + if (error <= 0) return error; error = xfs_break_layouts(inode, iolock); @@ -577,13 +577,13 @@ restart: xfs_rw_iunlock(ip, *iolock); *iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, *iolock); + iov_iter_reexpand(from, count); goto restart; } error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); if (error) return error; } - iov_iter_truncate(from, count); /* * Updating the timestamps will grab the ilock again from diff --git a/include/linux/fs.h b/include/linux/fs.h index c7b21db7782f..b4aa400ac723 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2566,7 +2566,7 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); -int generic_write_checks(struct file *file, loff_t *pos, size_t *count); +extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/mm/filemap.c b/mm/filemap.c index dfc573c6ec25..243997a26e7c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2260,36 +2260,38 @@ EXPORT_SYMBOL(read_cache_page_gfp); * Returns appropriate error code that caller should return or * zero in case that write should be allowed. */ -inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count) +inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) { + struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; unsigned long limit = rlimit(RLIMIT_FSIZE); + loff_t pos; - if (unlikely(*pos < 0)) - return -EINVAL; + if (!iov_iter_count(from)) + return 0; /* FIXME: this is for backwards compatibility with 2.4 */ if (file->f_flags & O_APPEND) - *pos = i_size_read(inode); + iocb->ki_pos = i_size_read(inode); + + pos = iocb->ki_pos; if (limit != RLIM_INFINITY) { - if (*pos >= limit) { + if (iocb->ki_pos >= limit) { send_sig(SIGXFSZ, current, 0); return -EFBIG; } - if (*count > limit - (typeof(limit))*pos) - *count = limit - (typeof(limit))*pos; + iov_iter_truncate(from, limit - (unsigned long)pos); } /* * LFS rule */ - if (unlikely(*pos + *count > MAX_NON_LFS && + if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS && !(file->f_flags & O_LARGEFILE))) { - if (*pos >= MAX_NON_LFS) + if (pos >= MAX_NON_LFS) return -EFBIG; - if (*count > MAX_NON_LFS - (unsigned long)*pos) - *count = MAX_NON_LFS - (unsigned long)*pos; + iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos); } /* @@ -2299,16 +2301,11 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count) * exceeded without writing data we send a signal and return EFBIG. * Linus frestrict idea will clean these up nicely.. */ - if (unlikely(*pos >= inode->i_sb->s_maxbytes)) { - if (*count || *pos > inode->i_sb->s_maxbytes) { - return -EFBIG; - } - /* zero-length writes at ->s_maxbytes are OK */ - } + if (unlikely(pos >= inode->i_sb->s_maxbytes)) + return -EFBIG; - if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) - *count = inode->i_sb->s_maxbytes - *pos; - return 0; + iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos); + return iov_iter_count(from); } EXPORT_SYMBOL(generic_write_checks); @@ -2618,14 +2615,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - size_t count = iov_iter_count(from); mutex_lock(&inode->i_mutex); - ret = generic_write_checks(file, &iocb->ki_pos, &count); - if (!ret && count) { - iov_iter_truncate(from, count); + ret = generic_write_checks(iocb, from); + if (ret > 0) ret = __generic_file_write_iter(iocb, from); - } mutex_unlock(&inode->i_mutex); if (ret > 0) { From 2ba48ce513c4e545318d22b138861d5876edf906 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 13:52:01 -0400 Subject: [PATCH 20/30] mirror O_APPEND and O_DIRECT into iocb->ki_flags ... avoiding write_iter/fcntl races. Signed-off-by: Al Viro --- fs/aio.c | 2 +- fs/btrfs/file.c | 2 +- fs/ceph/file.c | 8 ++++---- fs/ext4/file.c | 4 ++-- fs/fuse/file.c | 2 +- fs/gfs2/file.c | 2 +- fs/nfs/file.c | 6 +++--- fs/ocfs2/file.c | 10 +++++----- fs/xfs/xfs_file.c | 4 ++-- include/linux/fs.h | 15 +++++++++++++++ mm/filemap.c | 6 +++--- 11 files changed, 38 insertions(+), 23 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 5785c4b58fea..e976185c8e5b 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1502,7 +1502,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } req->common.ki_pos = iocb->aio_offset; req->common.ki_complete = aio_complete; - req->common.ki_flags = 0; + req->common.ki_flags = iocb_flags(req->common.ki_filp); if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c64d11c41eeb..faa7d390841b 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1794,7 +1794,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (sync) atomic_inc(&BTRFS_I(inode)->sync_writers); - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from, pos); } else { num_written = __btrfs_buffered_write(file, from, pos); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3f0b9339d823..b9b8eb225f66 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -457,7 +457,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, if (ret < 0) return ret; - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { while (iov_iter_count(i)) { size_t start; ssize_t n; @@ -828,7 +828,7 @@ again: return ret; if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || - (iocb->ki_filp->f_flags & O_DIRECT) || + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", @@ -995,12 +995,12 @@ retry_snap: inode, ceph_vinop(inode), pos, count, ceph_cap_string(got)); if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || - (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) { struct iov_iter data; mutex_unlock(&inode->i_mutex); /* we might need to revert back to that point */ data = *from; - if (file->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) written = ceph_sync_direct_write(iocb, &data, pos); else written = ceph_sync_write(iocb, &data, pos); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index c10785f10d1d..53bbc0b1995f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); struct mutex *aio_mutex = NULL; struct blk_plug plug; - int o_direct = io_is_direct(file); + int o_direct = iocb->ki_flags & IOCB_DIRECT; int overwrite = 0; ssize_t ret; @@ -106,7 +106,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && - (file->f_flags & O_APPEND || + (iocb->ki_flags & IOCB_APPEND || ext4_unaligned_aio(inode, from, iocb->ki_pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b86c8e08399a..5ef05b5c4cff 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1177,7 +1177,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (file->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos = iocb->ki_pos; written = generic_file_direct_write(iocb, from, pos); if (written < 0 || !iov_iter_count(from)) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 614bb42cb7e1..08329afa1339 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -709,7 +709,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); - if (file->f_flags & O_APPEND) { + if (iocb->ki_flags & IOCB_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f6a3adedf027..14364dc001f7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -170,7 +170,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; - if (iocb->ki_filp->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_read(iocb, to, iocb->ki_pos); dprintk("NFS: read(%pD2, %zu@%lu)\n", @@ -680,7 +680,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result) return result; - if (file->f_flags & O_DIRECT) + if (iocb->ki_flags & IOCB_DIRECT) return nfs_file_direct_write(iocb, from, pos); dprintk("NFS: write(%pD2, %zu@%Ld)\n", @@ -692,7 +692,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) /* * O_APPEND implies that we must revalidate the file length. */ - if (file->f_flags & O_APPEND) { + if (iocb->ki_flags & IOCB_APPEND) { result = nfs_revalidate_file_size(inode, file); if (result) goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index b93919f50f0f..cd37f6cd4d51 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2274,8 +2274,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, if (count == 0) return 0; - appending = file->f_flags & O_APPEND ? 1 : 0; - direct_io = file->f_flags & O_DIRECT ? 1 : 0; + appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0; + direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0; mutex_lock(&inode->i_mutex); @@ -2429,7 +2429,7 @@ relock: out_dio: /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); if (unlikely(written <= 0)) goto no_sync; @@ -2546,7 +2546,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ - if (filp->f_flags & O_DIRECT) { + if (iocb->ki_flags & IOCB_DIRECT) { have_alloc_sem = 1; ocfs2_iocb_set_sem_locked(iocb); @@ -2580,7 +2580,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, trace_generic_file_aio_read_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); + BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); /* see ocfs2_file_write_iter */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 28d157807b42..1f12ad0a8585 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -279,7 +279,7 @@ xfs_file_read_iter( XFS_STATS_INC(xs_read_calls); - if (unlikely(file->f_flags & O_DIRECT)) + if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ioflags |= XFS_IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= XFS_IO_INVIS; @@ -804,7 +804,7 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if (unlikely(file->f_flags & O_DIRECT)) + if (unlikely(iocb->ki_flags & IOCB_DIRECT)) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); diff --git a/include/linux/fs.h b/include/linux/fs.h index b4aa400ac723..b1d7db28c13c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,8 @@ struct address_space; struct writeback_control; #define IOCB_EVENTFD (1 << 0) +#define IOCB_APPEND (1 << 1) +#define IOCB_DIRECT (1 << 2) struct kiocb { struct file *ki_filp; @@ -329,10 +331,13 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) return kiocb->ki_complete == NULL; } +static inline int iocb_flags(struct file *file); + static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) { *kiocb = (struct kiocb) { .ki_filp = filp, + .ki_flags = iocb_flags(filp), }; } @@ -2779,6 +2784,16 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(file_inode(filp)); } +static inline int iocb_flags(struct file *file) +{ + int res = 0; + if (file->f_flags & O_APPEND) + res |= IOCB_APPEND; + if (io_is_direct(file)) + res |= IOCB_DIRECT; + return res; +} + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; diff --git a/mm/filemap.c b/mm/filemap.c index 243997a26e7c..405de370e657 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1694,7 +1694,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) loff_t *ppos = &iocb->ki_pos; loff_t pos = *ppos; - if (io_is_direct(file)) { + if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); @@ -2271,7 +2271,7 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) return 0; /* FIXME: this is for backwards compatibility with 2.4 */ - if (file->f_flags & O_APPEND) + if (iocb->ki_flags & IOCB_APPEND) iocb->ki_pos = i_size_read(inode); pos = iocb->ki_pos; @@ -2545,7 +2545,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (err) goto out; - if (io_is_direct(file)) { + if (iocb->ki_flags & IOCB_DIRECT) { loff_t pos, endbyte; written = generic_file_direct_write(iocb, from, iocb->ki_pos); From 7da839c475894ea872ec909a5d2e83dddccff5be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 14:01:33 -0400 Subject: [PATCH 21/30] ocfs2: use __generic_file_write_iter() we can do that now - all we need is to clear IOCB_DIRECT from ->ki_flags in "can't do dio" case. Signed-off-by: Al Viro --- fs/ocfs2/file.c | 64 ++++++------------------------------------------- 1 file changed, 7 insertions(+), 57 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index cd37f6cd4d51..913fc250d85a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2259,11 +2259,11 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, u32 old_clusters; struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - struct address_space *mapping = file->f_mapping; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); int unaligned_dio = 0; + int dropped_dio = 0; trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -2351,7 +2351,9 @@ relock: rw_level = -1; direct_io = 0; + iocb->ki_flags &= ~IOCB_DIRECT; iov_iter_reexpand(from, orig_count); + dropped_dio = 1; goto relock; } @@ -2375,67 +2377,15 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - if (direct_io) { - loff_t endbyte; - ssize_t written_buffered; - written = generic_file_direct_write(iocb, from, iocb->ki_pos); - if (written < 0 || !iov_iter_count(from)) { - ret = written; - goto out_dio; - } - - /* - * for completing the rest of the request. - */ - written_buffered = generic_perform_write(file, from, iocb->ki_pos); - /* - * If generic_file_buffered_write() returned a synchronous error - * then we want to return the number of bytes which were - * direct-written, or the error code if that was zero. Note - * that this differs from normal direct-io semantics, which - * will return -EFOO even if some bytes were written. - */ - if (written_buffered < 0) { - ret = written_buffered; - goto out_dio; - } - - /* We need to ensure that the page cache pages are written to - * disk and invalidated to preserve the expected O_DIRECT - * semantics. - */ - endbyte = iocb->ki_pos + written_buffered - 1; - ret = filemap_write_and_wait_range(file->f_mapping, iocb->ki_pos, - endbyte); - if (ret == 0) { - iocb->ki_pos += written_buffered; - written += written_buffered; - invalidate_mapping_pages(mapping, - iocb->ki_pos >> PAGE_CACHE_SHIFT, - endbyte >> PAGE_CACHE_SHIFT); - } else { - /* - * We don't know how much we wrote, so just return - * the number of bytes which were direct-written - */ - } - } else { - current->backing_dev_info = inode_to_bdi(inode); - written = generic_perform_write(file, from, iocb->ki_pos); - if (likely(written >= 0)) - iocb->ki_pos = iocb->ki_pos + written; - current->backing_dev_info = NULL; - } - -out_dio: + written = __generic_file_write_iter(iocb, from); /* buffered aio wouldn't have proper lock coverage today */ - BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); + BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT)); if (unlikely(written <= 0)) goto no_sync; - if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || - ((file->f_flags & O_DIRECT) && !direct_io)) { + if (((file->f_flags & O_DSYNC) && !direct_io) || + IS_SYNC(inode) || dropped_dio) { ret = filemap_fdatawrite_range(file->f_mapping, iocb->ki_pos - written, iocb->ki_pos - 1); From 65a4a1cad7c56e7056fb4b35ac2d93695612612c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 9 Apr 2015 14:11:08 -0400 Subject: [PATCH 22/30] nfs: generic_write_checks() shouldn't be done on swapout... Signed-off-by: Al Viro --- fs/nfs/direct.c | 12 +++--------- fs/nfs/file.c | 11 +++++++---- include/linux/nfs_fs.h | 3 +-- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9634189b8545..682f65fe09b5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -268,7 +268,7 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos) if (iov_iter_rw(iter) == READ) return nfs_file_direct_read(iocb, iter, pos); - return nfs_file_direct_write(iocb, iter, pos); + return nfs_file_direct_write(iocb, iter); #endif /* CONFIG_NFS_SWAP */ } @@ -959,8 +959,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; @@ -968,15 +967,11 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, struct inode *inode = mapping->host; struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; - loff_t end; + loff_t pos, end; dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", file, iov_iter_count(iter), (long long) iocb->ki_pos); - result = generic_write_checks(iocb, iter); - if (result <= 0) - goto out; - nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, iov_iter_count(iter)); @@ -1044,7 +1039,6 @@ out_release: nfs_direct_req_release(dreq); out_unlock: mutex_unlock(&inode->i_mutex); -out: return result; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 14364dc001f7..c40e4363e746 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -674,17 +674,20 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) unsigned long written = 0; ssize_t result; size_t count = iov_iter_count(from); - loff_t pos = iocb->ki_pos; result = nfs_key_timeout_notify(file, inode); if (result) return result; - if (iocb->ki_flags & IOCB_DIRECT) - return nfs_file_direct_write(iocb, from, pos); + if (iocb->ki_flags & IOCB_DIRECT) { + result = generic_write_checks(iocb, from); + if (result <= 0) + return result; + return nfs_file_direct_write(iocb, from); + } dprintk("NFS: write(%pD2, %zu@%Ld)\n", - file, count, (long long) pos); + file, count, (long long) iocb->ki_pos); result = -EBUSY; if (IS_SWAPFILE(inode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 3d1b0d2fe55e..410abd172feb 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -452,8 +452,7 @@ extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, loff_t pos); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter, - loff_t pos); + struct iov_iter *iter); /* * linux/fs/nfs/dir.c From 525d27b23555419e0e7b73fb6e78d4d678cb4f32 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 11 Feb 2015 13:40:17 +0000 Subject: [PATCH 23/30] VFS: Add owner-filesystem positive/negative dentry checks Supply two functions to test whether a filesystem's own dentries are positive or negative (d_really_is_positive() and d_really_is_negative()). The problem is that the DCACHE_ENTRY_TYPE field of dentry->d_flags may be overridden by the union part of a layered filesystem and isn't thus necessarily indicative of the type of dentry. Normally, this would involve a negative dentry (ie. ->d_inode == NULL) having ->d_layer.lower pointed to a lower layer dentry, DCACHE_PINNING_LOWER set and the DCACHE_ENTRY_TYPE field set to something other than DCACHE_MISS_TYPE - but it could also involve, say, a DCACHE_SPECIAL_TYPE being overridden to DCACHE_WHITEOUT_TYPE if a 0,0 chardev is detected in the top layer. However, inside a filesystem, when that fs is looking at its own dentries, it probably wants to know if they are really negative or not - and doesn't care about the fallthrough bits used by the union. To this end, a filesystem should normally use d_really_is_positive/negative() when looking at its own dentries rather than d_is_positive/negative() and should use d_inode() to get at the inode. Anyone looking at someone else's dentries (this includes pathwalk) should use d_is_xxx() and d_backing_inode(). Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/dcache.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d8358799c594..e83768ee38fc 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -482,6 +482,44 @@ static inline bool d_is_positive(const struct dentry *dentry) return !d_is_negative(dentry); } +/** + * d_really_is_negative - Determine if a dentry is really negative (ignoring fallthroughs) + * @dentry: The dentry in question + * + * Returns true if the dentry represents either an absent name or a name that + * doesn't map to an inode (ie. ->d_inode is NULL). The dentry could represent + * a true miss, a whiteout that isn't represented by a 0,0 chardev or a + * fallthrough marker in an opaque directory. + * + * Note! (1) This should be used *only* by a filesystem to examine its own + * dentries. It should not be used to look at some other filesystem's + * dentries. (2) It should also be used in combination with d_inode() to get + * the inode. (3) The dentry may have something attached to ->d_lower and the + * type field of the flags may be set to something other than miss or whiteout. + */ +static inline bool d_really_is_negative(const struct dentry *dentry) +{ + return dentry->d_inode == NULL; +} + +/** + * d_really_is_positive - Determine if a dentry is really positive (ignoring fallthroughs) + * @dentry: The dentry in question + * + * Returns true if the dentry represents a name that maps to an inode + * (ie. ->d_inode is not NULL). The dentry might still represent a whiteout if + * that is represented on medium as a 0,0 chardev. + * + * Note! (1) This should be used *only* by a filesystem to examine its own + * dentries. It should not be used to look at some other filesystem's + * dentries. (2) It should also be used in combination with d_inode() to get + * the inode. + */ +static inline bool d_really_is_positive(const struct dentry *dentry) +{ + return dentry->d_inode != NULL; +} + extern void d_set_fallthru(struct dentry *dentry); static inline bool d_is_fallthru(const struct dentry *dentry) From 4bf46a272647d89e780126b52eda04737defd9f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 5 Mar 2015 14:09:22 +0000 Subject: [PATCH 24/30] VFS: Impose ordering on accesses of d_inode and d_flags Impose ordering on accesses of d_inode and d_flags to avoid the need to do this: if (!dentry->d_inode || d_is_negative(dentry)) { when this: if (d_is_negative(dentry)) { should suffice. This check is especially problematic if a dentry can have its type field set to something other than DENTRY_MISS_TYPE when d_inode is NULL (as in unionmount). What we really need to do is stick a write barrier between setting d_inode and setting d_flags and a read barrier between reading d_flags and reading d_inode. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/dcache.c | 47 +++++++++++++++++++++++++++++++++++------- include/linux/dcache.h | 21 +++---------------- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index d99736a63e3c..656ce522a218 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,6 +269,41 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } +/* + * Make sure other CPUs see the inode attached before the type is set. + */ +static inline void __d_set_inode_and_type(struct dentry *dentry, + struct inode *inode, + unsigned type_flags) +{ + unsigned flags; + + dentry->d_inode = inode; + smp_wmb(); + flags = READ_ONCE(dentry->d_flags); + flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + flags |= type_flags; + WRITE_ONCE(dentry->d_flags, flags); +} + +/* + * Ideally, we want to make sure that other CPUs see the flags cleared before + * the inode is detached, but this is really a violation of RCU principles + * since the ordering suggests we should always set inode before flags. + * + * We should instead replace or discard the entire dentry - but that sucks + * performancewise on mass deletion/rename. + */ +static inline void __d_clear_type_and_inode(struct dentry *dentry) +{ + unsigned flags = READ_ONCE(dentry->d_flags); + + flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); + WRITE_ONCE(dentry->d_flags, flags); + smp_wmb(); + dentry->d_inode = NULL; +} + static void dentry_free(struct dentry *dentry) { WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); @@ -311,7 +346,7 @@ static void dentry_iput(struct dentry * dentry) { struct inode *inode = dentry->d_inode; if (inode) { - dentry->d_inode = NULL; + __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -335,8 +370,7 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; - __d_clear_type(dentry); - dentry->d_inode = NULL; + __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); @@ -1715,11 +1749,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) unsigned add_flags = d_flags_for_inode(inode); spin_lock(&dentry->d_lock); - dentry->d_flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); - dentry->d_flags |= add_flags; if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); - dentry->d_inode = inode; + __d_set_inode_and_type(dentry, inode, add_flags); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); @@ -1937,8 +1969,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) add_flags |= DCACHE_DISCONNECTED; spin_lock(&tmp->d_lock); - tmp->d_inode = inode; - tmp->d_flags |= add_flags; + __d_set_inode_and_type(tmp, inode, add_flags); hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e83768ee38fc..df334cbacc6d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -404,26 +404,11 @@ static inline bool d_mountpoint(const struct dentry *dentry) /* * Directory cache entry type accessor functions. */ -static inline void __d_set_type(struct dentry *dentry, unsigned type) -{ - dentry->d_flags = (dentry->d_flags & ~DCACHE_ENTRY_TYPE) | type; -} - -static inline void __d_clear_type(struct dentry *dentry) -{ - __d_set_type(dentry, DCACHE_MISS_TYPE); -} - -static inline void d_set_type(struct dentry *dentry, unsigned type) -{ - spin_lock(&dentry->d_lock); - __d_set_type(dentry, type); - spin_unlock(&dentry->d_lock); -} - static inline unsigned __d_entry_type(const struct dentry *dentry) { - return dentry->d_flags & DCACHE_ENTRY_TYPE; + unsigned type = READ_ONCE(dentry->d_flags); + smp_rmb(); + return type & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) From 88e7fbd4a599375a08876e80a76d92e49fdea55c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Mar 2015 16:38:26 +0000 Subject: [PATCH 25/30] NFS: Don't use d_inode as a variable name Don't use d_inode as a variable name as it now masks a function name. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/nfs/read.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 568ecf0a880f..b8f5c63f77b2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -117,15 +117,15 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, static void nfs_readpage_release(struct nfs_page *req) { - struct inode *d_inode = req->wb_context->dentry->d_inode; + struct inode *inode = req->wb_context->dentry->d_inode; - dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, + dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode), req->wb_bytes, (long long)req_offset(req)); if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + nfs_readpage_to_fscache(inode, req->wb_page, 0); unlock_page(req->wb_page); } From 698934df8b45da2a06816ee2d7f9a9034e671e62 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 17:33:52 +0000 Subject: [PATCH 26/30] VFS: Combine inode checks with d_is_negative() and d_is_positive() in pathwalk Where we have: if (!dentry->d_inode || d_is_negative(dentry)) { type constructions in pathwalk we should be able to eliminate the check of d_inode and rely solely on the result of d_is_negative() or d_is_positive(). What we do have to take care to do is to read d_inode after calling a d_is_xxx() typecheck function to get the barriering right. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 76fb76a0818b..5a9291c31881 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1585,7 +1585,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } err = -ENOENT; - if (!inode || d_is_negative(path->dentry)) + if (d_is_negative(path->dentry)) goto out_path_put; if (should_follow_link(path->dentry, follow)) { @@ -2310,7 +2310,7 @@ mountpoint_last(struct nameidata *nd, struct path *path) mutex_unlock(&dir->d_inode->i_mutex); done: - if (!dentry->d_inode || d_is_negative(dentry)) { + if (d_is_negative(dentry)) { error = -ENOENT; dput(dentry); goto out; @@ -3038,7 +3038,7 @@ retry_lookup: finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (!inode || d_is_negative(path->dentry)) { + if (d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } From 7ceab50c0be56cf1bbaf2b3dd1c6cda80e5335fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 5 Mar 2015 12:46:49 +0000 Subject: [PATCH 27/30] VFS: Fix up debugfs to use d_is_dir() in place of S_ISDIR() Fix up debugfs to use d_is_dir(dentry) in place of S_ISDIR(dentry->d_inode->i_mode). Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/debugfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 96400ab42d13..26856ecdea5e 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -521,7 +521,7 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent) if (debugfs_positive(dentry)) { dget(dentry); - if (S_ISDIR(dentry->d_inode->i_mode)) + if (d_is_dir(dentry)) ret = simple_rmdir(parent->d_inode, dentry); else simple_unlink(parent->d_inode, dentry); From 4bbcbd3b11dbc676a272be508e47d1c4a5056349 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 17 Mar 2015 22:16:40 +0000 Subject: [PATCH 28/30] VFS: Make pathwalk use d_is_reg() rather than S_ISREG() Make pathwalk use d_is_reg() rather than S_ISREG() to determine whether to honour O_TRUNC. Since this occurs after complete_walk(), the dentry type field cannot change and the inode pointer cannot change as we hold a ref on the dentry, so this should be safe. Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/namei.c b/fs/namei.c index 5a9291c31881..ffab2e06e147 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3077,7 +3077,7 @@ finish_open: error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) goto out; - if (!S_ISREG(nd->inode->i_mode)) + if (!d_is_reg(nd->path.dentry)) will_truncate = false; if (will_truncate) { From 6683de3886a313ae3d4b8c0323313a987073481b Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 2 Mar 2015 16:40:32 +0000 Subject: [PATCH 29/30] configfs: Fix inconsistent use of file_inode() vs file->f_path.dentry->d_inode Fix inconsistent use of file_inode() vs file->f_path.dentry->d_inode. Reported-by: Dan Carpenter Signed-off-by: David Howells Signed-off-by: Al Viro --- fs/configfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cf0db005d2f5..acb3d63bc9dc 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) if (offset >= 0) break; default: - mutex_unlock(&file_inode(file)->i_mutex); + mutex_unlock(&dentry->d_inode->i_mutex); return -EINVAL; } if (offset != file->f_pos) { From aa4d86163e4e91a1ac560954a554bab417e338f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2015 18:23:29 +0200 Subject: [PATCH 30/30] block: loop: switch to VFS ITER_BVEC Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- drivers/block/loop.c | 310 ++++++++++++++++++------------------------- 1 file changed, 128 insertions(+), 182 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c4fd1e45ce1e..ae3fcb4199e9 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,28 +88,6 @@ static int part_shift; static struct workqueue_struct *loop_wq; -/* - * Transfer functions - */ -static int transfer_none(struct loop_device *lo, int cmd, - struct page *raw_page, unsigned raw_off, - struct page *loop_page, unsigned loop_off, - int size, sector_t real_block) -{ - char *raw_buf = kmap_atomic(raw_page) + raw_off; - char *loop_buf = kmap_atomic(loop_page) + loop_off; - - if (cmd == READ) - memcpy(loop_buf, raw_buf, size); - else - memcpy(raw_buf, loop_buf, size); - - kunmap_atomic(loop_buf); - kunmap_atomic(raw_buf); - cond_resched(); - return 0; -} - static int transfer_xor(struct loop_device *lo, int cmd, struct page *raw_page, unsigned raw_off, struct page *loop_page, unsigned loop_off, @@ -148,14 +126,13 @@ static int xor_init(struct loop_device *lo, const struct loop_info64 *info) static struct loop_func_table none_funcs = { .number = LO_CRYPT_NONE, - .transfer = transfer_none, -}; +}; static struct loop_func_table xor_funcs = { .number = LO_CRYPT_XOR, .transfer = transfer_xor, .init = xor_init -}; +}; /* xfer_funcs[0] is special - its release function is never called */ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { @@ -215,209 +192,171 @@ lo_do_transfer(struct loop_device *lo, int cmd, struct page *lpage, unsigned loffs, int size, sector_t rblock) { - if (unlikely(!lo->transfer)) + int ret; + + ret = lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); + if (likely(!ret)) return 0; - return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock); + printk_ratelimited(KERN_ERR + "loop: Transfer error at byte offset %llu, length %i.\n", + (unsigned long long)rblock << 9, size); + return ret; } -/** - * __do_lo_send_write - helper for writing data to a loop device - * - * This helper just factors out common code between do_lo_send_direct_write() - * and do_lo_send_write(). - */ -static int __do_lo_send_write(struct file *file, - u8 *buf, const int len, loff_t pos) +static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) { - struct kvec kvec = {.iov_base = buf, .iov_len = len}; - struct iov_iter from; + struct iov_iter i; ssize_t bw; - iov_iter_kvec(&from, ITER_KVEC | WRITE, &kvec, 1, len); + iov_iter_bvec(&i, ITER_BVEC, bvec, 1, bvec->bv_len); file_start_write(file); - bw = vfs_iter_write(file, &from, &pos); + bw = vfs_iter_write(file, &i, ppos); file_end_write(file); - if (likely(bw == len)) + + if (likely(bw == bvec->bv_len)) return 0; - printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", - (unsigned long long)pos, len); + + printk_ratelimited(KERN_ERR + "loop: Write error at byte offset %llu, length %i.\n", + (unsigned long long)*ppos, bvec->bv_len); if (bw >= 0) bw = -EIO; return bw; } -/** - * do_lo_send_direct_write - helper for writing data to a loop device - * - * This is the fast, non-transforming version that does not need double - * buffering. - */ -static int do_lo_send_direct_write(struct loop_device *lo, - struct bio_vec *bvec, loff_t pos, struct page *page) +static int lo_write_simple(struct loop_device *lo, struct request *rq, + loff_t pos) { - ssize_t bw = __do_lo_send_write(lo->lo_backing_file, - kmap(bvec->bv_page) + bvec->bv_offset, - bvec->bv_len, pos); - kunmap(bvec->bv_page); - cond_resched(); - return bw; + struct bio_vec bvec; + struct req_iterator iter; + int ret = 0; + + rq_for_each_segment(bvec, rq, iter) { + ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); + if (ret < 0) + break; + cond_resched(); + } + + return ret; } -/** - * do_lo_send_write - helper for writing data to a loop device - * +/* * This is the slow, transforming version that needs to double buffer the * data as it cannot do the transformations in place without having direct * access to the destination pages of the backing file. */ -static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec, - loff_t pos, struct page *page) +static int lo_write_transfer(struct loop_device *lo, struct request *rq, + loff_t pos) { - int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page, - bvec->bv_offset, bvec->bv_len, pos >> 9); - if (likely(!ret)) - return __do_lo_send_write(lo->lo_backing_file, - page_address(page), bvec->bv_len, - pos); - printk_ratelimited(KERN_ERR "loop: Transfer error at byte offset %llu, " - "length %i.\n", (unsigned long long)pos, bvec->bv_len); - if (ret > 0) - ret = -EIO; - return ret; -} - -static int lo_send(struct loop_device *lo, struct request *rq, loff_t pos) -{ - int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t, - struct page *page); - struct bio_vec bvec; + struct bio_vec bvec, b; struct req_iterator iter; - struct page *page = NULL; + struct page *page; int ret = 0; - if (lo->transfer != transfer_none) { - page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); - if (unlikely(!page)) - goto fail; - kmap(page); - do_lo_send = do_lo_send_write; - } else { - do_lo_send = do_lo_send_direct_write; - } + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; rq_for_each_segment(bvec, rq, iter) { - ret = do_lo_send(lo, &bvec, pos, page); + ret = lo_do_transfer(lo, WRITE, page, 0, bvec.bv_page, + bvec.bv_offset, bvec.bv_len, pos >> 9); + if (unlikely(ret)) + break; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + ret = lo_write_bvec(lo->lo_backing_file, &b, &pos); if (ret < 0) break; - pos += bvec.bv_len; } - if (page) { - kunmap(page); - __free_page(page); - } -out: + + __free_page(page); return ret; -fail: - printk_ratelimited(KERN_ERR "loop: Failed to allocate temporary page for write.\n"); - ret = -ENOMEM; - goto out; } -struct lo_read_data { - struct loop_device *lo; - struct page *page; - unsigned offset; - int bsize; -}; - -static int -lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) -{ - struct lo_read_data *p = sd->u.data; - struct loop_device *lo = p->lo; - struct page *page = buf->page; - sector_t IV; - int size; - - IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) + - (buf->offset >> 9); - size = sd->len; - if (size > p->bsize) - size = p->bsize; - - if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) { - printk_ratelimited(KERN_ERR "loop: transfer error block %ld\n", - page->index); - size = -EINVAL; - } - - flush_dcache_page(p->page); - - if (size > 0) - p->offset += size; - - return size; -} - -static int -lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd) -{ - return __splice_from_pipe(pipe, sd, lo_splice_actor); -} - -static ssize_t -do_lo_receive(struct loop_device *lo, - struct bio_vec *bvec, int bsize, loff_t pos) -{ - struct lo_read_data cookie; - struct splice_desc sd; - struct file *file; - ssize_t retval; - - cookie.lo = lo; - cookie.page = bvec->bv_page; - cookie.offset = bvec->bv_offset; - cookie.bsize = bsize; - - sd.len = 0; - sd.total_len = bvec->bv_len; - sd.flags = 0; - sd.pos = pos; - sd.u.data = &cookie; - - file = lo->lo_backing_file; - retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor); - - return retval; -} - -static int -lo_receive(struct loop_device *lo, struct request *rq, int bsize, loff_t pos) +static int lo_read_simple(struct loop_device *lo, struct request *rq, + loff_t pos) { struct bio_vec bvec; struct req_iterator iter; - ssize_t s; + struct iov_iter i; + ssize_t len; rq_for_each_segment(bvec, rq, iter) { - s = do_lo_receive(lo, &bvec, bsize, pos); - if (s < 0) - return s; + iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len); + len = vfs_iter_read(lo->lo_backing_file, &i, &pos); + if (len < 0) + return len; - if (s != bvec.bv_len) { + flush_dcache_page(bvec.bv_page); + + if (len != bvec.bv_len) { struct bio *bio; __rq_for_each_bio(bio, rq) zero_fill_bio(bio); break; } - pos += bvec.bv_len; + cond_resched(); } + return 0; } +static int lo_read_transfer(struct loop_device *lo, struct request *rq, + loff_t pos) +{ + struct bio_vec bvec, b; + struct req_iterator iter; + struct iov_iter i; + struct page *page; + ssize_t len; + int ret = 0; + + page = alloc_page(GFP_NOIO); + if (unlikely(!page)) + return -ENOMEM; + + rq_for_each_segment(bvec, rq, iter) { + loff_t offset = pos; + + b.bv_page = page; + b.bv_offset = 0; + b.bv_len = bvec.bv_len; + + iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len); + len = vfs_iter_read(lo->lo_backing_file, &i, &pos); + if (len < 0) { + ret = len; + goto out_free_page; + } + + ret = lo_do_transfer(lo, READ, page, 0, bvec.bv_page, + bvec.bv_offset, len, offset >> 9); + if (ret) + goto out_free_page; + + flush_dcache_page(bvec.bv_page); + + if (len != bvec.bv_len) { + struct bio *bio; + + __rq_for_each_bio(bio, rq) + zero_fill_bio(bio); + break; + } + } + + ret = 0; +out_free_page: + __free_page(page); + return ret; +} + static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) { /* @@ -464,10 +403,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) ret = lo_req_flush(lo, rq); else if (rq->cmd_flags & REQ_DISCARD) ret = lo_discard(lo, rq, pos); + else if (lo->transfer) + ret = lo_write_transfer(lo, rq, pos); else - ret = lo_send(lo, rq, pos); - } else - ret = lo_receive(lo, rq, lo->lo_blocksize, pos); + ret = lo_write_simple(lo, rq, pos); + + } else { + if (lo->transfer) + ret = lo_read_transfer(lo, rq, pos); + else + ret = lo_read_simple(lo, rq, pos); + } return ret; } @@ -788,7 +734,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, lo->lo_device = bdev; lo->lo_flags = lo_flags; lo->lo_backing_file = file; - lo->transfer = transfer_none; + lo->transfer = NULL; lo->ioctl = NULL; lo->lo_sizelimit = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); @@ -1007,7 +953,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, info->lo_encrypt_key_size); lo->lo_key_owner = uid; - } + } return 0; }