mirror of
https://github.com/torvalds/linux.git
synced 2024-11-29 15:41:36 +00:00
netfs: Implement unbuffered/DIO write support
Implement support for unbuffered writes and direct I/O writes. If the write is misaligned with respect to the fscrypt block size, then RMW cycles are performed if necessary. DIO writes are a special case of unbuffered writes with extra restriction imposed, such as block size alignment requirements. Also provide a field that can tell the code to add some extra space onto the bounce buffer for use by the filesystem in the case of a content-encrypted file. Signed-off-by: David Howells <dhowells@redhat.com> Reviewed-by: Jeff Layton <jlayton@kernel.org> cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org
This commit is contained in:
parent
016dc8516a
commit
153a9961b5
@ -250,7 +250,7 @@ static void afs_apply_status(struct afs_operation *op,
|
|||||||
* what's on the server.
|
* what's on the server.
|
||||||
*/
|
*/
|
||||||
vnode->netfs.remote_i_size = status->size;
|
vnode->netfs.remote_i_size = status->size;
|
||||||
if (change_size) {
|
if (change_size || status->size > i_size_read(inode)) {
|
||||||
afs_set_i_size(vnode, status->size);
|
afs_set_i_size(vnode, status->size);
|
||||||
inode_set_ctime_to_ts(inode, t);
|
inode_set_ctime_to_ts(inode, t);
|
||||||
inode_set_atime_to_ts(inode, t);
|
inode_set_atime_to_ts(inode, t);
|
||||||
|
@ -4,6 +4,7 @@ netfs-y := \
|
|||||||
buffered_read.o \
|
buffered_read.o \
|
||||||
buffered_write.o \
|
buffered_write.o \
|
||||||
direct_read.o \
|
direct_read.o \
|
||||||
|
direct_write.o \
|
||||||
io.o \
|
io.o \
|
||||||
iterator.o \
|
iterator.o \
|
||||||
locking.o \
|
locking.o \
|
||||||
|
166
fs/netfs/direct_write.c
Normal file
166
fs/netfs/direct_write.c
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
/* Unbuffered and direct write support.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
|
||||||
|
* Written by David Howells (dhowells@redhat.com)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <linux/export.h>
|
||||||
|
#include <linux/uio.h>
|
||||||
|
#include "internal.h"
|
||||||
|
|
||||||
|
static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
|
||||||
|
{
|
||||||
|
struct inode *inode = wreq->inode;
|
||||||
|
unsigned long long end = wreq->start + wreq->len;
|
||||||
|
|
||||||
|
if (!wreq->error &&
|
||||||
|
i_size_read(inode) < end) {
|
||||||
|
if (wreq->netfs_ops->update_i_size)
|
||||||
|
wreq->netfs_ops->update_i_size(inode, end);
|
||||||
|
else
|
||||||
|
i_size_write(inode, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform an unbuffered write where we may have to do an RMW operation on an
|
||||||
|
* encrypted file. This can also be used for direct I/O writes.
|
||||||
|
*/
|
||||||
|
ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
|
struct netfs_group *netfs_group)
|
||||||
|
{
|
||||||
|
struct netfs_io_request *wreq;
|
||||||
|
unsigned long long start = iocb->ki_pos;
|
||||||
|
unsigned long long end = start + iov_iter_count(iter);
|
||||||
|
ssize_t ret, n;
|
||||||
|
bool async = !is_sync_kiocb(iocb);
|
||||||
|
|
||||||
|
_enter("");
|
||||||
|
|
||||||
|
/* We're going to need a bounce buffer if what we transmit is going to
|
||||||
|
* be different in some way to the source buffer, e.g. because it gets
|
||||||
|
* encrypted/compressed or because it needs expanding to a block size.
|
||||||
|
*/
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
_debug("uw %llx-%llx", start, end);
|
||||||
|
|
||||||
|
wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
|
||||||
|
start, end - start,
|
||||||
|
iocb->ki_flags & IOCB_DIRECT ?
|
||||||
|
NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE);
|
||||||
|
if (IS_ERR(wreq))
|
||||||
|
return PTR_ERR(wreq);
|
||||||
|
|
||||||
|
{
|
||||||
|
/* If this is an async op and we're not using a bounce buffer,
|
||||||
|
* we have to save the source buffer as the iterator is only
|
||||||
|
* good until we return. In such a case, extract an iterator
|
||||||
|
* to represent as much of the the output buffer as we can
|
||||||
|
* manage. Note that the extraction might not be able to
|
||||||
|
* allocate a sufficiently large bvec array and may shorten the
|
||||||
|
* request.
|
||||||
|
*/
|
||||||
|
if (async || user_backed_iter(iter)) {
|
||||||
|
n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0);
|
||||||
|
if (n < 0) {
|
||||||
|
ret = n;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
|
||||||
|
wreq->direct_bv_count = n;
|
||||||
|
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
|
||||||
|
wreq->len = iov_iter_count(&wreq->iter);
|
||||||
|
} else {
|
||||||
|
wreq->iter = *iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
wreq->io_iter = wreq->iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copy the data into the bounce buffer and encrypt it. */
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
/* Dispatch the write. */
|
||||||
|
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
|
||||||
|
if (async)
|
||||||
|
wreq->iocb = iocb;
|
||||||
|
wreq->cleanup = netfs_cleanup_dio_write;
|
||||||
|
ret = netfs_begin_write(wreq, is_sync_kiocb(iocb),
|
||||||
|
iocb->ki_flags & IOCB_DIRECT ?
|
||||||
|
netfs_write_trace_dio_write :
|
||||||
|
netfs_write_trace_unbuffered_write);
|
||||||
|
if (ret < 0) {
|
||||||
|
_debug("begin = %zd", ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!async) {
|
||||||
|
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
|
||||||
|
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
|
||||||
|
TASK_UNINTERRUPTIBLE);
|
||||||
|
|
||||||
|
ret = wreq->error;
|
||||||
|
_debug("waited = %zd", ret);
|
||||||
|
if (ret == 0) {
|
||||||
|
ret = wreq->transferred;
|
||||||
|
iocb->ki_pos += ret;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ret = -EIOCBQUEUED;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* netfs_unbuffered_write_iter - Unbuffered write to a file
|
||||||
|
* @iocb: IO state structure
|
||||||
|
* @from: iov_iter with data to write
|
||||||
|
*
|
||||||
|
* Do an unbuffered write to a file, writing the data directly to the server
|
||||||
|
* and not lodging the data in the pagecache.
|
||||||
|
*
|
||||||
|
* Return:
|
||||||
|
* * Negative error code if no data has been written at all of
|
||||||
|
* vfs_fsync_range() failed for a synchronous write
|
||||||
|
* * Number of bytes written, even for truncated writes
|
||||||
|
*/
|
||||||
|
ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
|
{
|
||||||
|
struct file *file = iocb->ki_filp;
|
||||||
|
struct inode *inode = file->f_mapping->host;
|
||||||
|
struct netfs_inode *ictx = netfs_inode(inode);
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
|
||||||
|
|
||||||
|
trace_netfs_write_iter(iocb, from);
|
||||||
|
|
||||||
|
ret = netfs_start_io_direct(inode);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
ret = generic_write_checks(iocb, from);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
ret = file_remove_privs(file);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
ret = file_update_time(file);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
ret = kiocb_invalidate_pages(iocb, iov_iter_count(from));
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode),
|
||||||
|
FSCACHE_INVAL_DIO_WRITE);
|
||||||
|
ret = netfs_unbuffered_write_iter_locked(iocb, from, NULL);
|
||||||
|
out:
|
||||||
|
netfs_end_io_direct(inode);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(netfs_unbuffered_write_iter);
|
@ -26,6 +26,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq);
|
|||||||
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
|
||||||
size_t offset, size_t len);
|
size_t offset, size_t len);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* direct_write.c
|
||||||
|
*/
|
||||||
|
ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
|
struct netfs_group *netfs_group);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* io.c
|
* io.c
|
||||||
*/
|
*/
|
||||||
|
@ -645,7 +645,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
|
|||||||
|
|
||||||
subreq->debug_index = (*_debug_index)++;
|
subreq->debug_index = (*_debug_index)++;
|
||||||
subreq->start = rreq->start + rreq->submitted;
|
subreq->start = rreq->start + rreq->submitted;
|
||||||
subreq->len = rreq->len - rreq->submitted;
|
subreq->len = io_iter->count;
|
||||||
|
|
||||||
_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
|
_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
|
||||||
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
|
||||||
|
@ -30,7 +30,9 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
|
|||||||
[NETFS_READPAGE] = "RP",
|
[NETFS_READPAGE] = "RP",
|
||||||
[NETFS_READ_FOR_WRITE] = "RW",
|
[NETFS_READ_FOR_WRITE] = "RW",
|
||||||
[NETFS_WRITEBACK] = "WB",
|
[NETFS_WRITEBACK] = "WB",
|
||||||
|
[NETFS_UNBUFFERED_WRITE] = "UW",
|
||||||
[NETFS_DIO_READ] = "DR",
|
[NETFS_DIO_READ] = "DR",
|
||||||
|
[NETFS_DIO_WRITE] = "DW",
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -20,8 +20,10 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
|
|||||||
struct inode *inode = file ? file_inode(file) : mapping->host;
|
struct inode *inode = file ? file_inode(file) : mapping->host;
|
||||||
struct netfs_inode *ctx = netfs_inode(inode);
|
struct netfs_inode *ctx = netfs_inode(inode);
|
||||||
struct netfs_io_request *rreq;
|
struct netfs_io_request *rreq;
|
||||||
bool is_dio = (origin == NETFS_DIO_READ);
|
bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
|
||||||
bool cached = is_dio && netfs_is_cache_enabled(ctx);
|
origin == NETFS_DIO_READ ||
|
||||||
|
origin == NETFS_DIO_WRITE);
|
||||||
|
bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),
|
rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),
|
||||||
|
@ -74,11 +74,21 @@ static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async
|
|||||||
{
|
{
|
||||||
struct netfs_io_subrequest *subreq;
|
struct netfs_io_subrequest *subreq;
|
||||||
struct netfs_inode *ctx = netfs_inode(wreq->inode);
|
struct netfs_inode *ctx = netfs_inode(wreq->inode);
|
||||||
|
size_t transferred = 0;
|
||||||
|
|
||||||
_enter("R=%x[]", wreq->debug_id);
|
_enter("R=%x[]", wreq->debug_id);
|
||||||
|
|
||||||
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
|
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
|
||||||
|
|
||||||
|
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
|
||||||
|
if (subreq->error || subreq->transferred == 0)
|
||||||
|
break;
|
||||||
|
transferred += subreq->transferred;
|
||||||
|
if (subreq->transferred < subreq->len)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
wreq->transferred = transferred;
|
||||||
|
|
||||||
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
|
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
|
||||||
if (!subreq->error)
|
if (!subreq->error)
|
||||||
continue;
|
continue;
|
||||||
@ -110,11 +120,28 @@ static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async
|
|||||||
|
|
||||||
wreq->cleanup(wreq);
|
wreq->cleanup(wreq);
|
||||||
|
|
||||||
|
if (wreq->origin == NETFS_DIO_WRITE &&
|
||||||
|
wreq->mapping->nrpages) {
|
||||||
|
pgoff_t first = wreq->start >> PAGE_SHIFT;
|
||||||
|
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
|
||||||
|
invalidate_inode_pages2_range(wreq->mapping, first, last);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wreq->origin == NETFS_DIO_WRITE)
|
||||||
|
inode_dio_end(wreq->inode);
|
||||||
|
|
||||||
_debug("finished");
|
_debug("finished");
|
||||||
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
|
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
|
||||||
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
|
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
|
||||||
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
|
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
|
||||||
|
|
||||||
|
if (wreq->iocb) {
|
||||||
|
wreq->iocb->ki_pos += transferred;
|
||||||
|
if (wreq->iocb->ki_complete)
|
||||||
|
wreq->iocb->ki_complete(
|
||||||
|
wreq->iocb, wreq->error ? wreq->error : transferred);
|
||||||
|
}
|
||||||
|
|
||||||
netfs_clear_subrequests(wreq, was_async);
|
netfs_clear_subrequests(wreq, was_async);
|
||||||
netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
|
netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
|
||||||
}
|
}
|
||||||
@ -329,6 +356,9 @@ int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
|
|||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (wreq->origin == NETFS_DIO_WRITE)
|
||||||
|
inode_dio_begin(wreq->inode);
|
||||||
|
|
||||||
wreq->io_iter = wreq->iter;
|
wreq->io_iter = wreq->iter;
|
||||||
|
|
||||||
/* ->outstanding > 0 carries a ref */
|
/* ->outstanding > 0 carries a ref */
|
||||||
|
@ -138,6 +138,7 @@ struct netfs_inode {
|
|||||||
loff_t remote_i_size; /* Size of the remote file */
|
loff_t remote_i_size; /* Size of the remote file */
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
|
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
|
||||||
|
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -226,7 +227,9 @@ enum netfs_io_origin {
|
|||||||
NETFS_READPAGE, /* This read is a synchronous read */
|
NETFS_READPAGE, /* This read is a synchronous read */
|
||||||
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
|
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
|
||||||
NETFS_WRITEBACK, /* This write was triggered by writepages */
|
NETFS_WRITEBACK, /* This write was triggered by writepages */
|
||||||
|
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
|
||||||
NETFS_DIO_READ, /* This is a direct I/O read */
|
NETFS_DIO_READ, /* This is a direct I/O read */
|
||||||
|
NETFS_DIO_WRITE, /* This is a direct I/O write */
|
||||||
nr__netfs_io_origin
|
nr__netfs_io_origin
|
||||||
} __mode(byte);
|
} __mode(byte);
|
||||||
|
|
||||||
@ -379,6 +382,7 @@ ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
|
|||||||
/* High-level write API */
|
/* High-level write API */
|
||||||
ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
|
ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
|
||||||
struct netfs_group *netfs_group);
|
struct netfs_group *netfs_group);
|
||||||
|
ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
|
||||||
|
|
||||||
/* Address operations API */
|
/* Address operations API */
|
||||||
struct readahead_control;
|
struct readahead_control;
|
||||||
|
@ -33,7 +33,9 @@
|
|||||||
EM(NETFS_READPAGE, "RP") \
|
EM(NETFS_READPAGE, "RP") \
|
||||||
EM(NETFS_READ_FOR_WRITE, "RW") \
|
EM(NETFS_READ_FOR_WRITE, "RW") \
|
||||||
EM(NETFS_WRITEBACK, "WB") \
|
EM(NETFS_WRITEBACK, "WB") \
|
||||||
E_(NETFS_DIO_READ, "DR")
|
EM(NETFS_UNBUFFERED_WRITE, "UW") \
|
||||||
|
EM(NETFS_DIO_READ, "DR") \
|
||||||
|
E_(NETFS_DIO_WRITE, "DW")
|
||||||
|
|
||||||
#define netfs_rreq_traces \
|
#define netfs_rreq_traces \
|
||||||
EM(netfs_rreq_trace_assess, "ASSESS ") \
|
EM(netfs_rreq_trace_assess, "ASSESS ") \
|
||||||
|
@ -2706,6 +2706,7 @@ int kiocb_invalidate_pages(struct kiocb *iocb, size_t count)
|
|||||||
return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
|
return invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
|
||||||
end >> PAGE_SHIFT);
|
end >> PAGE_SHIFT);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(kiocb_invalidate_pages);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* generic_file_read_iter - generic filesystem read routine
|
* generic_file_read_iter - generic filesystem read routine
|
||||||
|
Loading…
Reference in New Issue
Block a user