linux/fs/netfs/output.c
David Howells 153a9961b5 netfs: Implement unbuffered/DIO write support
Implement support for unbuffered writes and direct I/O writes.  If the
write is misaligned with respect to the fscrypt block size, then RMW cycles
are performed if necessary.  DIO writes are a special case of unbuffered
writes with extra restriction imposed, such as block size alignment
requirements.

Also provide a field that can tell the code to add some extra space onto
the bounce buffer for use by the filesystem in the case of a
content-encrypted file.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-cachefs@redhat.com
cc: linux-fsdevel@vger.kernel.org
cc: linux-mm@kvack.org
2023-12-28 09:45:24 +00:00

394 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/* Network filesystem high-level write support.
*
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include "internal.h"
/**
* netfs_create_write_request - Create a write operation.
* @wreq: The write request this is storing from.
* @dest: The destination type
* @start: Start of the region this write will modify
* @len: Length of the modification
* @worker: The worker function to handle the write(s)
*
* Allocate a write operation, set it up and add it to the list on a write
* request.
*/
struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
enum netfs_io_source dest,
loff_t start, size_t len,
work_func_t worker)
{
struct netfs_io_subrequest *subreq;
subreq = netfs_alloc_subrequest(wreq);
if (subreq) {
INIT_WORK(&subreq->work, worker);
subreq->source = dest;
subreq->start = start;
subreq->len = len;
subreq->debug_index = wreq->subreq_counter++;
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
netfs_stat(&netfs_n_wh_upload);
break;
case NETFS_WRITE_TO_CACHE:
netfs_stat(&netfs_n_wh_write);
break;
default:
BUG();
}
subreq->io_iter = wreq->io_iter;
iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start);
iov_iter_truncate(&subreq->io_iter, subreq->len);
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
atomic_inc(&wreq->nr_outstanding);
list_add_tail(&subreq->rreq_link, &wreq->subrequests);
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
}
return subreq;
}
EXPORT_SYMBOL(netfs_create_write_request);
/*
* Process a completed write request once all the component operations have
* been completed.
*/
static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
{
struct netfs_io_subrequest *subreq;
struct netfs_inode *ctx = netfs_inode(wreq->inode);
size_t transferred = 0;
_enter("R=%x[]", wreq->debug_id);
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
if (subreq->error || subreq->transferred == 0)
break;
transferred += subreq->transferred;
if (subreq->transferred < subreq->len)
break;
}
wreq->transferred = transferred;
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
if (!subreq->error)
continue;
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
/* Depending on the type of failure, this may prevent
* writeback completion unless we're in disconnected
* mode.
*/
if (!wreq->error)
wreq->error = subreq->error;
break;
case NETFS_WRITE_TO_CACHE:
/* Failure doesn't prevent writeback completion unless
* we're in disconnected mode.
*/
if (subreq->error != -ENOBUFS)
ctx->ops->invalidate_cache(wreq);
break;
default:
WARN_ON_ONCE(1);
if (!wreq->error)
wreq->error = -EIO;
return;
}
}
wreq->cleanup(wreq);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
pgoff_t first = wreq->start >> PAGE_SHIFT;
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
invalidate_inode_pages2_range(wreq->mapping, first, last);
}
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_end(wreq->inode);
_debug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
if (wreq->iocb) {
wreq->iocb->ki_pos += transferred;
if (wreq->iocb->ki_complete)
wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : transferred);
}
netfs_clear_subrequests(wreq, was_async);
netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
}
/*
* Deal with the completion of writing the data to the cache.
*/
void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
bool was_async)
{
struct netfs_io_subrequest *subreq = _op;
struct netfs_io_request *wreq = subreq->rreq;
unsigned int u;
_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
switch (subreq->source) {
case NETFS_UPLOAD_TO_SERVER:
netfs_stat(&netfs_n_wh_upload_done);
break;
case NETFS_WRITE_TO_CACHE:
netfs_stat(&netfs_n_wh_write_done);
break;
case NETFS_INVALID_WRITE:
break;
default:
BUG();
}
if (IS_ERR_VALUE(transferred_or_error)) {
subreq->error = transferred_or_error;
trace_netfs_failure(wreq, subreq, transferred_or_error,
netfs_fail_write);
goto failed;
}
if (WARN(transferred_or_error > subreq->len - subreq->transferred,
"Subreq excess write: R%x[%x] %zd > %zu - %zu",
wreq->debug_id, subreq->debug_index,
transferred_or_error, subreq->len, subreq->transferred))
transferred_or_error = subreq->len - subreq->transferred;
subreq->error = 0;
subreq->transferred += transferred_or_error;
if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
wreq->debug_id, subreq->debug_index,
iov_iter_count(&subreq->io_iter), subreq->len,
subreq->transferred, subreq->io_iter.iter_type);
if (subreq->transferred < subreq->len)
goto incomplete;
__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
out:
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
/* If we decrement nr_outstanding to 0, the ref belongs to us. */
u = atomic_dec_return(&wreq->nr_outstanding);
if (u == 0)
netfs_write_terminated(wreq, was_async);
else if (u == 1)
wake_up_var(&wreq->nr_outstanding);
netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
return;
incomplete:
if (transferred_or_error == 0) {
if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
subreq->error = -ENODATA;
goto failed;
}
} else {
__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
}
__set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
goto out;
failed:
switch (subreq->source) {
case NETFS_WRITE_TO_CACHE:
netfs_stat(&netfs_n_wh_write_failed);
set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
break;
case NETFS_UPLOAD_TO_SERVER:
netfs_stat(&netfs_n_wh_upload_failed);
set_bit(NETFS_RREQ_FAILED, &wreq->flags);
wreq->error = subreq->error;
break;
default:
break;
}
goto out;
}
EXPORT_SYMBOL(netfs_write_subrequest_terminated);
static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *wreq = subreq->rreq;
struct netfs_cache_resources *cres = &wreq->cache_resources;
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
cres->ops->write(cres, subreq->start, &subreq->io_iter,
netfs_write_subrequest_terminated, subreq);
}
static void netfs_write_to_cache_op_worker(struct work_struct *work)
{
struct netfs_io_subrequest *subreq =
container_of(work, struct netfs_io_subrequest, work);
netfs_write_to_cache_op(subreq);
}
/**
* netfs_queue_write_request - Queue a write request for attention
* @subreq: The write request to be queued
*
* Queue the specified write request for processing by a worker thread. We
* pass the caller's ref on the request to the worker thread.
*/
void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
{
if (!queue_work(system_unbound_wq, &subreq->work))
netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip);
}
EXPORT_SYMBOL(netfs_queue_write_request);
/*
* Set up a op for writing to the cache.
*/
static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
{
struct netfs_cache_resources *cres;
struct netfs_io_subrequest *subreq;
struct netfs_inode *ctx = netfs_inode(wreq->inode);
struct fscache_cookie *cookie = netfs_i_cookie(ctx);
loff_t start = wreq->start;
size_t len = wreq->len;
int ret;
if (!fscache_cookie_enabled(cookie)) {
clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags);
return;
}
_debug("write to cache");
subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
netfs_write_to_cache_op_worker);
if (!subreq)
return;
cres = &wreq->cache_resources;
ret = fscache_begin_read_operation(cres, cookie);
if (ret < 0) {
netfs_write_subrequest_terminated(subreq, ret, false);
return;
}
ret = cres->ops->prepare_write(cres, &start, &len, i_size_read(wreq->inode),
true);
if (ret < 0) {
netfs_write_subrequest_terminated(subreq, ret, false);
return;
}
netfs_queue_write_request(subreq);
}
/*
* Begin the process of writing out a chunk of data.
*
* We are given a write request that holds a series of dirty regions and
* (partially) covers a sequence of folios, all of which are present. The
* pages must have been marked as writeback as appropriate.
*
* We need to perform the following steps:
*
* (1) If encrypting, create an output buffer and encrypt each block of the
* data into it, otherwise the output buffer will point to the original
* folios.
*
* (2) If the data is to be cached, set up a write op for the entire output
* buffer to the cache, if the cache wants to accept it.
*
* (3) If the data is to be uploaded (ie. not merely cached):
*
* (a) If the data is to be compressed, create a compression buffer and
* compress the data into it.
*
* (b) For each destination we want to upload to, set up write ops to write
* to that destination. We may need multiple writes if the data is not
* contiguous or the span exceeds wsize for a server.
*/
int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
enum netfs_write_trace what)
{
struct netfs_inode *ctx = netfs_inode(wreq->inode);
_enter("R=%x %llx-%llx f=%lx",
wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
wreq->flags);
trace_netfs_write(wreq, what);
if (wreq->len == 0 || wreq->iter.count == 0) {
pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
return -EIO;
}
if (wreq->origin == NETFS_DIO_WRITE)
inode_dio_begin(wreq->inode);
wreq->io_iter = wreq->iter;
/* ->outstanding > 0 carries a ref */
netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
atomic_set(&wreq->nr_outstanding, 1);
/* Start the encryption/compression going. We can do that in the
* background whilst we generate a list of write ops that we want to
* perform.
*/
// TODO: Encrypt or compress the region as appropriate
/* We need to write all of the region to the cache */
if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
netfs_set_up_write_to_cache(wreq);
/* However, we don't necessarily write all of the region to the server.
* Caching of reads is being managed this way also.
*/
if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
if (atomic_dec_and_test(&wreq->nr_outstanding))
netfs_write_terminated(wreq, false);
if (!may_wait)
return -EIOCBQUEUED;
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
TASK_UNINTERRUPTIBLE);
return wreq->error;
}