gfs2: iomap direct I/O support
The page unmapping previously done in gfs2_direct_IO is now done generically in iomap_dio_rw. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Reviewed-by: Bob Peterson <rpeterso@redhat.com>
This commit is contained in:
parent
bcfe94139a
commit
967bcc91b0
100
fs/gfs2/aops.c
100
fs/gfs2/aops.c
@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
|
|
||||||
struct buffer_head *bh_result, int create)
|
|
||||||
{
|
|
||||||
return gfs2_block_map(inode, lblock, bh_result, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gfs2_writepage_common - Common bits of writepage
|
* gfs2_writepage_common - Common bits of writepage
|
||||||
* @page: The page to be written
|
* @page: The page to be written
|
||||||
@ -1024,96 +1018,6 @@ out:
|
|||||||
try_to_release_page(page, 0);
|
try_to_release_page(page, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* gfs2_ok_for_dio - check that dio is valid on this file
|
|
||||||
* @ip: The inode
|
|
||||||
* @offset: The offset at which we are reading or writing
|
|
||||||
*
|
|
||||||
* Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
|
|
||||||
* 1 (to accept the i/o request)
|
|
||||||
*/
|
|
||||||
static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Should we return an error here? I can't see that O_DIRECT for
|
|
||||||
* a stuffed file makes any sense. For now we'll silently fall
|
|
||||||
* back to buffered I/O
|
|
||||||
*/
|
|
||||||
if (gfs2_is_stuffed(ip))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (offset >= i_size_read(&ip->i_inode))
|
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
|
||||||
{
|
|
||||||
struct file *file = iocb->ki_filp;
|
|
||||||
struct inode *inode = file->f_mapping->host;
|
|
||||||
struct address_space *mapping = inode->i_mapping;
|
|
||||||
struct gfs2_inode *ip = GFS2_I(inode);
|
|
||||||
loff_t offset = iocb->ki_pos;
|
|
||||||
struct gfs2_holder gh;
|
|
||||||
int rv;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Deferred lock, even if its a write, since we do no allocation
|
|
||||||
* on this path. All we need change is atime, and this lock mode
|
|
||||||
* ensures that other nodes have flushed their buffered read caches
|
|
||||||
* (i.e. their page cache entries for this inode). We do not,
|
|
||||||
* unfortunately have the option of only flushing a range like
|
|
||||||
* the VFS does.
|
|
||||||
*/
|
|
||||||
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
|
|
||||||
rv = gfs2_glock_nq(&gh);
|
|
||||||
if (rv)
|
|
||||||
goto out_uninit;
|
|
||||||
rv = gfs2_ok_for_dio(ip, offset);
|
|
||||||
if (rv != 1)
|
|
||||||
goto out; /* dio not valid, fall back to buffered i/o */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Now since we are holding a deferred (CW) lock at this point, you
|
|
||||||
* might be wondering why this is ever needed. There is a case however
|
|
||||||
* where we've granted a deferred local lock against a cached exclusive
|
|
||||||
* glock. That is ok provided all granted local locks are deferred, but
|
|
||||||
* it also means that it is possible to encounter pages which are
|
|
||||||
* cached and possibly also mapped. So here we check for that and sort
|
|
||||||
* them out ahead of the dio. The glock state machine will take care of
|
|
||||||
* everything else.
|
|
||||||
*
|
|
||||||
* If in fact the cached glock state (gl->gl_state) is deferred (CW) in
|
|
||||||
* the first place, mapping->nr_pages will always be zero.
|
|
||||||
*/
|
|
||||||
if (mapping->nrpages) {
|
|
||||||
loff_t lstart = offset & ~(PAGE_SIZE - 1);
|
|
||||||
loff_t len = iov_iter_count(iter);
|
|
||||||
loff_t end = PAGE_ALIGN(offset + len) - 1;
|
|
||||||
|
|
||||||
rv = 0;
|
|
||||||
if (len == 0)
|
|
||||||
goto out;
|
|
||||||
if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
|
|
||||||
unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
|
|
||||||
rv = filemap_write_and_wait_range(mapping, lstart, end);
|
|
||||||
if (rv)
|
|
||||||
goto out;
|
|
||||||
if (iov_iter_rw(iter) == WRITE)
|
|
||||||
truncate_inode_pages_range(mapping, lstart, end);
|
|
||||||
}
|
|
||||||
|
|
||||||
rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
|
|
||||||
gfs2_get_block_direct, NULL, NULL, 0);
|
|
||||||
out:
|
|
||||||
gfs2_glock_dq(&gh);
|
|
||||||
out_uninit:
|
|
||||||
gfs2_holder_uninit(&gh);
|
|
||||||
return rv;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gfs2_releasepage - free the metadata associated with a page
|
* gfs2_releasepage - free the metadata associated with a page
|
||||||
* @page: the page that's being released
|
* @page: the page that's being released
|
||||||
@ -1194,7 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
|
|||||||
.bmap = gfs2_bmap,
|
.bmap = gfs2_bmap,
|
||||||
.invalidatepage = gfs2_invalidatepage,
|
.invalidatepage = gfs2_invalidatepage,
|
||||||
.releasepage = gfs2_releasepage,
|
.releasepage = gfs2_releasepage,
|
||||||
.direct_IO = gfs2_direct_IO,
|
.direct_IO = noop_direct_IO,
|
||||||
.migratepage = buffer_migrate_page,
|
.migratepage = buffer_migrate_page,
|
||||||
.is_partially_uptodate = block_is_partially_uptodate,
|
.is_partially_uptodate = block_is_partially_uptodate,
|
||||||
.error_remove_page = generic_error_remove_page,
|
.error_remove_page = generic_error_remove_page,
|
||||||
@ -1211,7 +1115,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
|
|||||||
.bmap = gfs2_bmap,
|
.bmap = gfs2_bmap,
|
||||||
.invalidatepage = gfs2_invalidatepage,
|
.invalidatepage = gfs2_invalidatepage,
|
||||||
.releasepage = gfs2_releasepage,
|
.releasepage = gfs2_releasepage,
|
||||||
.direct_IO = gfs2_direct_IO,
|
.direct_IO = noop_direct_IO,
|
||||||
.migratepage = buffer_migrate_page,
|
.migratepage = buffer_migrate_page,
|
||||||
.is_partially_uptodate = block_is_partially_uptodate,
|
.is_partially_uptodate = block_is_partially_uptodate,
|
||||||
.error_remove_page = generic_error_remove_page,
|
.error_remove_page = generic_error_remove_page,
|
||||||
|
@ -915,6 +915,9 @@ do_alloc:
|
|||||||
} else if (flags & IOMAP_WRITE) {
|
} else if (flags & IOMAP_WRITE) {
|
||||||
u64 alloc_size;
|
u64 alloc_size;
|
||||||
|
|
||||||
|
if (flags & IOMAP_DIRECT)
|
||||||
|
goto out; /* (see gfs2_file_direct_write) */
|
||||||
|
|
||||||
len = gfs2_alloc_size(inode, mp, len);
|
len = gfs2_alloc_size(inode, mp, len);
|
||||||
alloc_size = len << inode->i_blkbits;
|
alloc_size = len << inode->i_blkbits;
|
||||||
if (alloc_size < iomap->length)
|
if (alloc_size < iomap->length)
|
||||||
@ -1082,11 +1085,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
trace_gfs2_iomap_start(ip, pos, length, flags);
|
trace_gfs2_iomap_start(ip, pos, length, flags);
|
||||||
if (flags & IOMAP_WRITE) {
|
if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
|
||||||
ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
|
ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
|
||||||
} else {
|
} else {
|
||||||
ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
|
ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
|
||||||
release_metapath(&mp);
|
release_metapath(&mp);
|
||||||
|
/*
|
||||||
|
* Silently fall back to buffered I/O for stuffed files or if
|
||||||
|
* we've hot a hole (see gfs2_file_direct_write).
|
||||||
|
*/
|
||||||
|
if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
|
||||||
|
iomap->type != IOMAP_MAPPED)
|
||||||
|
ret = -ENOTBLK;
|
||||||
}
|
}
|
||||||
trace_gfs2_iomap_end(ip, iomap, ret);
|
trace_gfs2_iomap_end(ip, iomap, ret);
|
||||||
return ret;
|
return ret;
|
||||||
@ -1100,7 +1110,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
|
|||||||
struct gfs2_trans *tr = current->journal_info;
|
struct gfs2_trans *tr = current->journal_info;
|
||||||
struct buffer_head *dibh = iomap->private;
|
struct buffer_head *dibh = iomap->private;
|
||||||
|
|
||||||
if (!(flags & IOMAP_WRITE))
|
if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (iomap->type != IOMAP_INLINE) {
|
if (iomap->type != IOMAP_INLINE) {
|
||||||
|
130
fs/gfs2/file.c
130
fs/gfs2/file.c
@ -690,6 +690,85 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
|
|||||||
return ret ? ret : ret1;
|
return ret ? ret : ret1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
|
||||||
|
{
|
||||||
|
struct file *file = iocb->ki_filp;
|
||||||
|
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
|
||||||
|
size_t count = iov_iter_count(to);
|
||||||
|
struct gfs2_holder gh;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
if (!count)
|
||||||
|
return 0; /* skip atime */
|
||||||
|
|
||||||
|
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
|
||||||
|
ret = gfs2_glock_nq(&gh);
|
||||||
|
if (ret)
|
||||||
|
goto out_uninit;
|
||||||
|
|
||||||
|
/* fall back to buffered I/O for stuffed files */
|
||||||
|
ret = -ENOTBLK;
|
||||||
|
if (gfs2_is_stuffed(ip))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
|
||||||
|
|
||||||
|
out:
|
||||||
|
gfs2_glock_dq(&gh);
|
||||||
|
out_uninit:
|
||||||
|
gfs2_holder_uninit(&gh);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
|
||||||
|
{
|
||||||
|
struct file *file = iocb->ki_filp;
|
||||||
|
struct inode *inode = file->f_mapping->host;
|
||||||
|
struct gfs2_inode *ip = GFS2_I(inode);
|
||||||
|
size_t len = iov_iter_count(from);
|
||||||
|
loff_t offset = iocb->ki_pos;
|
||||||
|
struct gfs2_holder gh;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Deferred lock, even if its a write, since we do no allocation on
|
||||||
|
* this path. All we need to change is the atime, and this lock mode
|
||||||
|
* ensures that other nodes have flushed their buffered read caches
|
||||||
|
* (i.e. their page cache entries for this inode). We do not,
|
||||||
|
* unfortunately, have the option of only flushing a range like the
|
||||||
|
* VFS does.
|
||||||
|
*/
|
||||||
|
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
|
||||||
|
ret = gfs2_glock_nq(&gh);
|
||||||
|
if (ret)
|
||||||
|
goto out_uninit;
|
||||||
|
|
||||||
|
/* Silently fall back to buffered I/O when writing beyond EOF */
|
||||||
|
if (offset + len > i_size_read(&ip->i_inode))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);
|
||||||
|
|
||||||
|
out:
|
||||||
|
gfs2_glock_dq(&gh);
|
||||||
|
out_uninit:
|
||||||
|
gfs2_holder_uninit(&gh);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||||
|
{
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||||
|
ret = gfs2_file_direct_read(iocb, to);
|
||||||
|
if (likely(ret != -ENOTBLK))
|
||||||
|
return ret;
|
||||||
|
iocb->ki_flags &= ~IOCB_DIRECT;
|
||||||
|
}
|
||||||
|
return generic_file_read_iter(iocb, to);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gfs2_file_write_iter - Perform a write to a file
|
* gfs2_file_write_iter - Perform a write to a file
|
||||||
* @iocb: The io context
|
* @iocb: The io context
|
||||||
@ -707,7 +786,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
struct file *file = iocb->ki_filp;
|
struct file *file = iocb->ki_filp;
|
||||||
struct inode *inode = file_inode(file);
|
struct inode *inode = file_inode(file);
|
||||||
struct gfs2_inode *ip = GFS2_I(inode);
|
struct gfs2_inode *ip = GFS2_I(inode);
|
||||||
ssize_t ret;
|
ssize_t written = 0, ret;
|
||||||
|
|
||||||
ret = gfs2_rsqa_alloc(ip);
|
ret = gfs2_rsqa_alloc(ip);
|
||||||
if (ret)
|
if (ret)
|
||||||
@ -724,9 +803,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
gfs2_glock_dq_uninit(&gh);
|
gfs2_glock_dq_uninit(&gh);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (iocb->ki_flags & IOCB_DIRECT)
|
|
||||||
return generic_file_write_iter(iocb, from);
|
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock(inode);
|
||||||
ret = generic_write_checks(iocb, from);
|
ret = generic_write_checks(iocb, from);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
@ -743,19 +819,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto out2;
|
goto out2;
|
||||||
|
|
||||||
|
if (iocb->ki_flags & IOCB_DIRECT) {
|
||||||
|
struct address_space *mapping = file->f_mapping;
|
||||||
|
loff_t pos, endbyte;
|
||||||
|
ssize_t buffered;
|
||||||
|
|
||||||
|
written = gfs2_file_direct_write(iocb, from);
|
||||||
|
if (written < 0 || !iov_iter_count(from))
|
||||||
|
goto out2;
|
||||||
|
|
||||||
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
|
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
|
||||||
|
if (unlikely(ret < 0))
|
||||||
|
goto out2;
|
||||||
|
buffered = ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need to ensure that the page cache pages are written to
|
||||||
|
* disk and invalidated to preserve the expected O_DIRECT
|
||||||
|
* semantics.
|
||||||
|
*/
|
||||||
|
pos = iocb->ki_pos;
|
||||||
|
endbyte = pos + buffered - 1;
|
||||||
|
ret = filemap_write_and_wait_range(mapping, pos, endbyte);
|
||||||
|
if (!ret) {
|
||||||
|
iocb->ki_pos += buffered;
|
||||||
|
written += buffered;
|
||||||
|
invalidate_mapping_pages(mapping,
|
||||||
|
pos >> PAGE_SHIFT,
|
||||||
|
endbyte >> PAGE_SHIFT);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* We don't know how much we wrote, so just return
|
||||||
|
* the number of bytes which were direct-written
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
|
||||||
|
if (likely(ret > 0))
|
||||||
|
iocb->ki_pos += ret;
|
||||||
|
}
|
||||||
|
|
||||||
out2:
|
out2:
|
||||||
current->backing_dev_info = NULL;
|
current->backing_dev_info = NULL;
|
||||||
out:
|
out:
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
if (likely(ret > 0)) {
|
if (likely(ret > 0)) {
|
||||||
iocb->ki_pos += ret;
|
|
||||||
|
|
||||||
/* Handle various SYNC-type writes */
|
/* Handle various SYNC-type writes */
|
||||||
ret = generic_write_sync(iocb, ret);
|
ret = generic_write_sync(iocb, ret);
|
||||||
}
|
}
|
||||||
return ret;
|
return written ? written : ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
|
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
|
||||||
@ -1157,7 +1269,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
|
|||||||
|
|
||||||
const struct file_operations gfs2_file_fops = {
|
const struct file_operations gfs2_file_fops = {
|
||||||
.llseek = gfs2_llseek,
|
.llseek = gfs2_llseek,
|
||||||
.read_iter = generic_file_read_iter,
|
.read_iter = gfs2_file_read_iter,
|
||||||
.write_iter = gfs2_file_write_iter,
|
.write_iter = gfs2_file_write_iter,
|
||||||
.unlocked_ioctl = gfs2_ioctl,
|
.unlocked_ioctl = gfs2_ioctl,
|
||||||
.mmap = gfs2_mmap,
|
.mmap = gfs2_mmap,
|
||||||
@ -1187,7 +1299,7 @@ const struct file_operations gfs2_dir_fops = {
|
|||||||
|
|
||||||
const struct file_operations gfs2_file_fops_nolock = {
|
const struct file_operations gfs2_file_fops_nolock = {
|
||||||
.llseek = gfs2_llseek,
|
.llseek = gfs2_llseek,
|
||||||
.read_iter = generic_file_read_iter,
|
.read_iter = gfs2_file_read_iter,
|
||||||
.write_iter = gfs2_file_write_iter,
|
.write_iter = gfs2_file_write_iter,
|
||||||
.unlocked_ioctl = gfs2_ioctl,
|
.unlocked_ioctl = gfs2_ioctl,
|
||||||
.mmap = gfs2_mmap,
|
.mmap = gfs2_mmap,
|
||||||
|
Loading…
Reference in New Issue
Block a user