mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 14:12:06 +00:00
Merge remote-tracking branch 'dhowells/netfs-lib'
Pick up David Howells' netfs helper library and the new fscache API. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
commit
8b01888992
@ -53,6 +53,7 @@ filesystem implementations.
|
||||
journalling
|
||||
fscrypt
|
||||
fsverity
|
||||
netfs_library
|
||||
|
||||
Filesystems
|
||||
===========
|
||||
|
526
Documentation/filesystems/netfs_library.rst
Normal file
526
Documentation/filesystems/netfs_library.rst
Normal file
@ -0,0 +1,526 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================================
|
||||
NETWORK FILESYSTEM HELPER LIBRARY
|
||||
=================================
|
||||
|
||||
.. Contents:
|
||||
|
||||
- Overview.
|
||||
- Buffered read helpers.
|
||||
- Read helper functions.
|
||||
- Read helper structures.
|
||||
- Read helper operations.
|
||||
- Read helper procedure.
|
||||
- Read helper cache API.
|
||||
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
The network filesystem helper library is a set of functions designed to aid a
|
||||
network filesystem in implementing VM/VFS operations. For the moment, that
|
||||
just includes turning various VM buffered read operations into requests to read
|
||||
from the server. The helper library, however, can also interpose other
|
||||
services, such as local caching or local data encryption.
|
||||
|
||||
Note that the library module doesn't link against local caching directly, so
|
||||
access must be provided by the netfs.
|
||||
|
||||
|
||||
Buffered Read Helpers
|
||||
=====================
|
||||
|
||||
The library provides a set of read helpers that handle the ->readpage(),
|
||||
->readahead() and much of the ->write_begin() VM operations and translate them
|
||||
into a common call framework.
|
||||
|
||||
The following services are provided:
|
||||
|
||||
* Handles transparent huge pages (THPs).
|
||||
|
||||
* Insulates the netfs from VM interface changes.
|
||||
|
||||
* Allows the netfs to arbitrarily split reads up into pieces, even ones that
|
||||
don't match page sizes or page alignments and that may cross pages.
|
||||
|
||||
* Allows the netfs to expand a readahead request in both directions to meet
|
||||
its needs.
|
||||
|
||||
* Allows the netfs to partially fulfil a read, which will then be resubmitted.
|
||||
|
||||
* Handles local caching, allowing cached data and server-read data to be
|
||||
interleaved for a single request.
|
||||
|
||||
* Handles clearing of bufferage that aren't on the server.
|
||||
|
||||
* Handle retrying of reads that failed, switching reads from the cache to the
|
||||
server as necessary.
|
||||
|
||||
* In the future, this is a place that other services can be performed, such as
|
||||
local encryption of data to be stored remotely or in the cache.
|
||||
|
||||
From the network filesystem, the helpers require a table of operations. This
|
||||
includes a mandatory method to issue a read operation along with a number of
|
||||
optional methods.
|
||||
|
||||
|
||||
Read Helper Functions
|
||||
---------------------
|
||||
|
||||
Three read helpers are provided::
|
||||
|
||||
* void netfs_readahead(struct readahead_control *ractl,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);``
|
||||
* int netfs_readpage(struct file *file,
|
||||
struct page *page,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
* int netfs_write_begin(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos,
|
||||
unsigned int len,
|
||||
unsigned int flags,
|
||||
struct page **_page,
|
||||
void **_fsdata,
|
||||
const struct netfs_read_request_ops *ops,
|
||||
void *netfs_priv);
|
||||
|
||||
Each corresponds to a VM operation, with the addition of a couple of parameters
|
||||
for the use of the read helpers:
|
||||
|
||||
* ``ops``
|
||||
|
||||
A table of operations through which the helpers can talk to the filesystem.
|
||||
|
||||
* ``netfs_priv``
|
||||
|
||||
Filesystem private data (can be NULL).
|
||||
|
||||
Both of these values will be stored into the read request structure.
|
||||
|
||||
For ->readahead() and ->readpage(), the network filesystem should just jump
|
||||
into the corresponding read helper; whereas for ->write_begin(), it may be a
|
||||
little more complicated as the network filesystem might want to flush
|
||||
conflicting writes or track dirty data and needs to put the acquired page if an
|
||||
error occurs after calling the helper.
|
||||
|
||||
The helpers manage the read request, calling back into the network filesystem
|
||||
through the suppplied table of operations. Waits will be performed as
|
||||
necessary before returning for helpers that are meant to be synchronous.
|
||||
|
||||
If an error occurs and netfs_priv is non-NULL, ops->cleanup() will be called to
|
||||
deal with it. If some parts of the request are in progress when an error
|
||||
occurs, the request will get partially completed if sufficient data is read.
|
||||
|
||||
Additionally, there is::
|
||||
|
||||
* void netfs_subreq_terminated(struct netfs_read_subrequest *subreq,
|
||||
ssize_t transferred_or_error,
|
||||
bool was_async);
|
||||
|
||||
which should be called to complete a read subrequest. This is given the number
|
||||
of bytes transferred or a negative error code, plus a flag indicating whether
|
||||
the operation was asynchronous (ie. whether the follow-on processing can be
|
||||
done in the current context, given this may involve sleeping).
|
||||
|
||||
|
||||
Read Helper Structures
|
||||
----------------------
|
||||
|
||||
The read helpers make use of a couple of structures to maintain the state of
|
||||
the read. The first is a structure that manages a read request as a whole::
|
||||
|
||||
struct netfs_read_request {
|
||||
struct inode *inode;
|
||||
struct address_space *mapping;
|
||||
struct netfs_cache_resources cache_resources;
|
||||
void *netfs_priv;
|
||||
loff_t start;
|
||||
size_t len;
|
||||
loff_t i_size;
|
||||
const struct netfs_read_request_ops *netfs_ops;
|
||||
unsigned int debug_id;
|
||||
...
|
||||
};
|
||||
|
||||
The above fields are the ones the netfs can use. They are:
|
||||
|
||||
* ``inode``
|
||||
* ``mapping``
|
||||
|
||||
The inode and the address space of the file being read from. The mapping
|
||||
may or may not point to inode->i_data.
|
||||
|
||||
* ``cache_resources``
|
||||
|
||||
Resources for the local cache to use, if present.
|
||||
|
||||
* ``netfs_priv``
|
||||
|
||||
The network filesystem's private data. The value for this can be passed in
|
||||
to the helper functions or set during the request. The ->cleanup() op will
|
||||
be called if this is non-NULL at the end.
|
||||
|
||||
* ``start``
|
||||
* ``len``
|
||||
|
||||
The file position of the start of the read request and the length. These
|
||||
may be altered by the ->expand_readahead() op.
|
||||
|
||||
* ``i_size``
|
||||
|
||||
The size of the file at the start of the request.
|
||||
|
||||
* ``netfs_ops``
|
||||
|
||||
A pointer to the operation table. The value for this is passed into the
|
||||
helper functions.
|
||||
|
||||
* ``debug_id``
|
||||
|
||||
A number allocated to this operation that can be displayed in trace lines
|
||||
for reference.
|
||||
|
||||
|
||||
The second structure is used to manage individual slices of the overall read
|
||||
request::
|
||||
|
||||
struct netfs_read_subrequest {
|
||||
struct netfs_read_request *rreq;
|
||||
loff_t start;
|
||||
size_t len;
|
||||
size_t transferred;
|
||||
unsigned long flags;
|
||||
unsigned short debug_index;
|
||||
...
|
||||
};
|
||||
|
||||
Each subrequest is expected to access a single source, though the helpers will
|
||||
handle falling back from one source type to another. The members are:
|
||||
|
||||
* ``rreq``
|
||||
|
||||
A pointer to the read request.
|
||||
|
||||
* ``start``
|
||||
* ``len``
|
||||
|
||||
The file position of the start of this slice of the read request and the
|
||||
length.
|
||||
|
||||
* ``transferred``
|
||||
|
||||
The amount of data transferred so far of the length of this slice. The
|
||||
network filesystem or cache should start the operation this far into the
|
||||
slice. If a short read occurs, the helpers will call again, having updated
|
||||
this to reflect the amount read so far.
|
||||
|
||||
* ``flags``
|
||||
|
||||
Flags pertaining to the read. There are two of interest to the filesystem
|
||||
or cache:
|
||||
|
||||
* ``NETFS_SREQ_CLEAR_TAIL``
|
||||
|
||||
This can be set to indicate that the remainder of the slice, from
|
||||
transferred to len, should be cleared.
|
||||
|
||||
* ``NETFS_SREQ_SEEK_DATA_READ``
|
||||
|
||||
This is a hint to the cache that it might want to try skipping ahead to
|
||||
the next data (ie. using SEEK_DATA).
|
||||
|
||||
* ``debug_index``
|
||||
|
||||
A number allocated to this slice that can be displayed in trace lines for
|
||||
reference.
|
||||
|
||||
|
||||
Read Helper Operations
|
||||
----------------------
|
||||
|
||||
The network filesystem must provide the read helpers with a table of operations
|
||||
through which it can issue requests and negotiate::
|
||||
|
||||
struct netfs_read_request_ops {
|
||||
void (*init_rreq)(struct netfs_read_request *rreq, struct file *file);
|
||||
bool (*is_cache_enabled)(struct inode *inode);
|
||||
int (*begin_cache_operation)(struct netfs_read_request *rreq);
|
||||
void (*expand_readahead)(struct netfs_read_request *rreq);
|
||||
bool (*clamp_length)(struct netfs_read_subrequest *subreq);
|
||||
void (*issue_op)(struct netfs_read_subrequest *subreq);
|
||||
bool (*is_still_valid)(struct netfs_read_request *rreq);
|
||||
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
|
||||
struct page *page, void **_fsdata);
|
||||
void (*done)(struct netfs_read_request *rreq);
|
||||
void (*cleanup)(struct address_space *mapping, void *netfs_priv);
|
||||
};
|
||||
|
||||
The operations are as follows:
|
||||
|
||||
* ``init_rreq()``
|
||||
|
||||
[Optional] This is called to initialise the request structure. It is given
|
||||
the file for reference and can modify the ->netfs_priv value.
|
||||
|
||||
* ``is_cache_enabled()``
|
||||
|
||||
[Required] This is called by netfs_write_begin() to ask if the file is being
|
||||
cached. It should return true if it is being cached and false otherwise.
|
||||
|
||||
* ``begin_cache_operation()``
|
||||
|
||||
[Optional] This is called to ask the network filesystem to call into the
|
||||
cache (if present) to initialise the caching state for this read. The netfs
|
||||
library module cannot access the cache directly, so the cache should call
|
||||
something like fscache_begin_read_operation() to do this.
|
||||
|
||||
The cache gets to store its state in ->cache_resources and must set a table
|
||||
of operations of its own there (though of a different type).
|
||||
|
||||
This should return 0 on success and an error code otherwise. If an error is
|
||||
reported, the operation may proceed anyway, just without local caching (only
|
||||
out of memory and interruption errors cause failure here).
|
||||
|
||||
* ``expand_readahead()``
|
||||
|
||||
[Optional] This is called to allow the filesystem to expand the size of a
|
||||
readahead read request. The filesystem gets to expand the request in both
|
||||
directions, though it's not permitted to reduce it as the numbers may
|
||||
represent an allocation already made. If local caching is enabled, it gets
|
||||
to expand the request first.
|
||||
|
||||
Expansion is communicated by changing ->start and ->len in the request
|
||||
structure. Note that if any change is made, ->len must be increased by at
|
||||
least as much as ->start is reduced.
|
||||
|
||||
* ``clamp_length()``
|
||||
|
||||
[Optional] This is called to allow the filesystem to reduce the size of a
|
||||
subrequest. The filesystem can use this, for example, to chop up a request
|
||||
that has to be split across multiple servers or to put multiple reads in
|
||||
flight.
|
||||
|
||||
This should return 0 on success and an error code on error.
|
||||
|
||||
* ``issue_op()``
|
||||
|
||||
[Required] The helpers use this to dispatch a subrequest to the server for
|
||||
reading. In the subrequest, ->start, ->len and ->transferred indicate what
|
||||
data should be read from the server.
|
||||
|
||||
There is no return value; the netfs_subreq_terminated() function should be
|
||||
called to indicate whether or not the operation succeeded and how much data
|
||||
it transferred. The filesystem also should not deal with setting pages
|
||||
uptodate, unlocking them or dropping their refs - the helpers need to deal
|
||||
with this as they have to coordinate with copying to the local cache.
|
||||
|
||||
Note that the helpers have the pages locked, but not pinned. It is possible
|
||||
to use the ITER_XARRAY iov iterator to refer to the range of the inode that
|
||||
is being operated upon without the need to allocate large bvec tables.
|
||||
|
||||
* ``is_still_valid()``
|
||||
|
||||
[Optional] This is called to find out if the data just read from the local
|
||||
cache is still valid. It should return true if it is still valid and false
|
||||
if not. If it's not still valid, it will be reread from the server.
|
||||
|
||||
* ``check_write_begin()``
|
||||
|
||||
[Optional] This is called from the netfs_write_begin() helper once it has
|
||||
allocated/grabbed the page to be modified to allow the filesystem to flush
|
||||
conflicting state before allowing it to be modified.
|
||||
|
||||
It should return 0 if everything is now fine, -EAGAIN if the page should be
|
||||
regrabbed and any other error code to abort the operation.
|
||||
|
||||
* ``done``
|
||||
|
||||
[Optional] This is called after the pages in the request have all been
|
||||
unlocked (and marked uptodate if applicable).
|
||||
|
||||
* ``cleanup``
|
||||
|
||||
[Optional] This is called as the request is being deallocated so that the
|
||||
filesystem can clean up ->netfs_priv.
|
||||
|
||||
|
||||
|
||||
Read Helper Procedure
|
||||
---------------------
|
||||
|
||||
The read helpers work by the following general procedure:
|
||||
|
||||
* Set up the request.
|
||||
|
||||
* For readahead, allow the local cache and then the network filesystem to
|
||||
propose expansions to the read request. This is then proposed to the VM.
|
||||
If the VM cannot fully perform the expansion, a partially expanded read will
|
||||
be performed, though this may not get written to the cache in its entirety.
|
||||
|
||||
* Loop around slicing chunks off of the request to form subrequests:
|
||||
|
||||
* If a local cache is present, it gets to do the slicing, otherwise the
|
||||
helpers just try to generate maximal slices.
|
||||
|
||||
* The network filesystem gets to clamp the size of each slice if it is to be
|
||||
the source. This allows rsize and chunking to be implemented.
|
||||
|
||||
* The helpers issue a read from the cache or a read from the server or just
|
||||
clears the slice as appropriate.
|
||||
|
||||
* The next slice begins at the end of the last one.
|
||||
|
||||
* As slices finish being read, they terminate.
|
||||
|
||||
* When all the subrequests have terminated, the subrequests are assessed and
|
||||
any that are short or have failed are reissued:
|
||||
|
||||
* Failed cache requests are issued against the server instead.
|
||||
|
||||
* Failed server requests just fail.
|
||||
|
||||
* Short reads against either source will be reissued against that source
|
||||
provided they have transferred some more data:
|
||||
|
||||
* The cache may need to skip holes that it can't do DIO from.
|
||||
|
||||
* If NETFS_SREQ_CLEAR_TAIL was set, a short read will be cleared to the
|
||||
end of the slice instead of reissuing.
|
||||
|
||||
* Once the data is read, the pages that have been fully read/cleared:
|
||||
|
||||
* Will be marked uptodate.
|
||||
|
||||
* If a cache is present, will be marked with PG_fscache.
|
||||
|
||||
* Unlocked
|
||||
|
||||
* Any pages that need writing to the cache will then have DIO writes issued.
|
||||
|
||||
* Synchronous operations will wait for reading to be complete.
|
||||
|
||||
* Writes to the cache will proceed asynchronously and the pages will have the
|
||||
PG_fscache mark removed when that completes.
|
||||
|
||||
* The request structures will be cleaned up when everything has completed.
|
||||
|
||||
|
||||
Read Helper Cache API
|
||||
---------------------
|
||||
|
||||
When implementing a local cache to be used by the read helpers, two things are
|
||||
required: some way for the network filesystem to initialise the caching for a
|
||||
read request and a table of operations for the helpers to call.
|
||||
|
||||
The network filesystem's ->begin_cache_operation() method is called to set up a
|
||||
cache and this must call into the cache to do the work. If using fscache, for
|
||||
example, the cache would call::
|
||||
|
||||
int fscache_begin_read_operation(struct netfs_read_request *rreq,
|
||||
struct fscache_cookie *cookie);
|
||||
|
||||
passing in the request pointer and the cookie corresponding to the file.
|
||||
|
||||
The netfs_read_request object contains a place for the cache to hang its
|
||||
state::
|
||||
|
||||
struct netfs_cache_resources {
|
||||
const struct netfs_cache_ops *ops;
|
||||
void *cache_priv;
|
||||
void *cache_priv2;
|
||||
};
|
||||
|
||||
This contains an operations table pointer and two private pointers. The
|
||||
operation table looks like the following::
|
||||
|
||||
struct netfs_cache_ops {
|
||||
void (*end_operation)(struct netfs_cache_resources *cres);
|
||||
|
||||
void (*expand_readahead)(struct netfs_cache_resources *cres,
|
||||
loff_t *_start, size_t *_len, loff_t i_size);
|
||||
|
||||
enum netfs_read_source (*prepare_read)(struct netfs_read_subrequest *subreq,
|
||||
loff_t i_size);
|
||||
|
||||
int (*read)(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
bool seek_data,
|
||||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv);
|
||||
|
||||
int (*write)(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv);
|
||||
};
|
||||
|
||||
With a termination handler function pointer::
|
||||
|
||||
typedef void (*netfs_io_terminated_t)(void *priv,
|
||||
ssize_t transferred_or_error,
|
||||
bool was_async);
|
||||
|
||||
The methods defined in the table are:
|
||||
|
||||
* ``end_operation()``
|
||||
|
||||
[Required] Called to clean up the resources at the end of the read request.
|
||||
|
||||
* ``expand_readahead()``
|
||||
|
||||
[Optional] Called at the beginning of a netfs_readahead() operation to allow
|
||||
the cache to expand a request in either direction. This allows the cache to
|
||||
size the request appropriately for the cache granularity.
|
||||
|
||||
The function is passed poiners to the start and length in its parameters,
|
||||
plus the size of the file for reference, and adjusts the start and length
|
||||
appropriately. It should return one of:
|
||||
|
||||
* ``NETFS_FILL_WITH_ZEROES``
|
||||
* ``NETFS_DOWNLOAD_FROM_SERVER``
|
||||
* ``NETFS_READ_FROM_CACHE``
|
||||
* ``NETFS_INVALID_READ``
|
||||
|
||||
to indicate whether the slice should just be cleared or whether it should be
|
||||
downloaded from the server or read from the cache - or whether slicing
|
||||
should be given up at the current point.
|
||||
|
||||
* ``prepare_read()``
|
||||
|
||||
[Required] Called to configure the next slice of a request. ->start and
|
||||
->len in the subrequest indicate where and how big the next slice can be;
|
||||
the cache gets to reduce the length to match its granularity requirements.
|
||||
|
||||
* ``read()``
|
||||
|
||||
[Required] Called to read from the cache. The start file offset is given
|
||||
along with an iterator to read to, which gives the length also. It can be
|
||||
given a hint requesting that it seek forward from that start position for
|
||||
data.
|
||||
|
||||
Also provided is a pointer to a termination handler function and private
|
||||
data to pass to that function. The termination function should be called
|
||||
with the number of bytes transferred or an error code, plus a flag
|
||||
indicating whether the termination is definitely happening in the caller's
|
||||
context.
|
||||
|
||||
* ``write()``
|
||||
|
||||
[Required] Called to write to the cache. The start file offset is given
|
||||
along with an iterator to write from, which gives the length also.
|
||||
|
||||
Also provided is a pointer to a termination handler function and private
|
||||
data to pass to that function. The termination function should be called
|
||||
with the number of bytes transferred or an error code, plus a flag
|
||||
indicating whether the termination is definitely happening in the caller's
|
||||
context.
|
||||
|
||||
Note that these methods are passed a pointer to the cache resource structure,
|
||||
not the read request structure as they could be used in other situations where
|
||||
there isn't a read request structure as well, such as writing dirty data to the
|
||||
cache.
|
@ -125,6 +125,7 @@ source "fs/overlayfs/Kconfig"
|
||||
|
||||
menu "Caches"
|
||||
|
||||
source "fs/netfs/Kconfig"
|
||||
source "fs/fscache/Kconfig"
|
||||
source "fs/cachefiles/Kconfig"
|
||||
|
||||
|
@ -67,6 +67,7 @@ obj-y += devpts/
|
||||
obj-$(CONFIG_DLM) += dlm/
|
||||
|
||||
# Do not add any filesystems before this line
|
||||
obj-$(CONFIG_NETFS_SUPPORT) += netfs/
|
||||
obj-$(CONFIG_FSCACHE) += fscache/
|
||||
obj-$(CONFIG_REISERFS_FS) += reiserfs/
|
||||
obj-$(CONFIG_EXT4_FS) += ext4/
|
||||
|
@ -7,6 +7,7 @@ cachefiles-y := \
|
||||
bind.o \
|
||||
daemon.o \
|
||||
interface.o \
|
||||
io.o \
|
||||
key.o \
|
||||
main.o \
|
||||
namei.o \
|
||||
|
@ -319,8 +319,8 @@ static void cachefiles_drop_object(struct fscache_object *_object)
|
||||
/*
|
||||
* dispose of a reference to an object
|
||||
*/
|
||||
static void cachefiles_put_object(struct fscache_object *_object,
|
||||
enum fscache_obj_ref_trace why)
|
||||
void cachefiles_put_object(struct fscache_object *_object,
|
||||
enum fscache_obj_ref_trace why)
|
||||
{
|
||||
struct cachefiles_object *object;
|
||||
struct fscache_cache *cache;
|
||||
@ -568,4 +568,5 @@ const struct fscache_cache_ops cachefiles_cache_ops = {
|
||||
.uncache_page = cachefiles_uncache_page,
|
||||
.dissociate_pages = cachefiles_dissociate_pages,
|
||||
.check_consistency = cachefiles_check_consistency,
|
||||
.begin_read_operation = cachefiles_begin_read_operation,
|
||||
};
|
||||
|
@ -150,6 +150,9 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache,
|
||||
*/
|
||||
extern const struct fscache_cache_ops cachefiles_cache_ops;
|
||||
|
||||
void cachefiles_put_object(struct fscache_object *_object,
|
||||
enum fscache_obj_ref_trace why);
|
||||
|
||||
/*
|
||||
* key.c
|
||||
*/
|
||||
@ -217,6 +220,12 @@ extern int cachefiles_allocate_pages(struct fscache_retrieval *,
|
||||
extern int cachefiles_write_page(struct fscache_storage *, struct page *);
|
||||
extern void cachefiles_uncache_page(struct fscache_object *, struct page *);
|
||||
|
||||
/*
|
||||
* rdwr2.c
|
||||
*/
|
||||
extern int cachefiles_begin_read_operation(struct netfs_read_request *,
|
||||
struct fscache_retrieval *);
|
||||
|
||||
/*
|
||||
* security.c
|
||||
*/
|
||||
|
420
fs/cachefiles/io.c
Normal file
420
fs/cachefiles/io.c
Normal file
@ -0,0 +1,420 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* kiocb-using read/write
|
||||
*
|
||||
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/mount.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
struct cachefiles_kiocb {
|
||||
struct kiocb iocb;
|
||||
refcount_t ki_refcnt;
|
||||
loff_t start;
|
||||
union {
|
||||
size_t skipped;
|
||||
size_t len;
|
||||
};
|
||||
netfs_io_terminated_t term_func;
|
||||
void *term_func_priv;
|
||||
bool was_async;
|
||||
};
|
||||
|
||||
static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
|
||||
{
|
||||
if (refcount_dec_and_test(&ki->ki_refcnt)) {
|
||||
fput(ki->iocb.ki_filp);
|
||||
kfree(ki);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle completion of a read from the cache.
|
||||
*/
|
||||
static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2)
|
||||
{
|
||||
struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
|
||||
|
||||
_enter("%ld,%ld", ret, ret2);
|
||||
|
||||
if (ki->term_func) {
|
||||
if (ret >= 0)
|
||||
ret += ki->skipped;
|
||||
ki->term_func(ki->term_func_priv, ret, ki->was_async);
|
||||
}
|
||||
|
||||
cachefiles_put_kiocb(ki);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initiate a read from the cache.
|
||||
*/
|
||||
static int cachefiles_read(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
bool seek_data,
|
||||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv)
|
||||
{
|
||||
struct cachefiles_kiocb *ki;
|
||||
struct file *file = cres->cache_priv2;
|
||||
unsigned int old_nofs;
|
||||
ssize_t ret = -ENOBUFS;
|
||||
size_t len = iov_iter_count(iter), skipped = 0;
|
||||
|
||||
_enter("%pD,%li,%llx,%zx/%llx",
|
||||
file, file_inode(file)->i_ino, start_pos, len,
|
||||
i_size_read(file->f_inode));
|
||||
|
||||
/* If the caller asked us to seek for data before doing the read, then
|
||||
* we should do that now. If we find a gap, we fill it with zeros.
|
||||
*/
|
||||
if (seek_data) {
|
||||
loff_t off = start_pos, off2;
|
||||
|
||||
off2 = vfs_llseek(file, off, SEEK_DATA);
|
||||
if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
|
||||
skipped = 0;
|
||||
ret = off2;
|
||||
goto presubmission_error;
|
||||
}
|
||||
|
||||
if (off2 == -ENXIO || off2 >= start_pos + len) {
|
||||
/* The region is beyond the EOF or there's no more data
|
||||
* in the region, so clear the rest of the buffer and
|
||||
* return success.
|
||||
*/
|
||||
iov_iter_zero(len, iter);
|
||||
skipped = len;
|
||||
ret = 0;
|
||||
goto presubmission_error;
|
||||
}
|
||||
|
||||
skipped = off2 - off;
|
||||
iov_iter_zero(skipped, iter);
|
||||
}
|
||||
|
||||
ret = -ENOBUFS;
|
||||
ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
|
||||
if (!ki)
|
||||
goto presubmission_error;
|
||||
|
||||
refcount_set(&ki->ki_refcnt, 2);
|
||||
ki->iocb.ki_filp = file;
|
||||
ki->iocb.ki_pos = start_pos + skipped;
|
||||
ki->iocb.ki_flags = IOCB_DIRECT;
|
||||
ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
|
||||
ki->iocb.ki_ioprio = get_current_ioprio();
|
||||
ki->skipped = skipped;
|
||||
ki->term_func = term_func;
|
||||
ki->term_func_priv = term_func_priv;
|
||||
ki->was_async = true;
|
||||
|
||||
if (ki->term_func)
|
||||
ki->iocb.ki_complete = cachefiles_read_complete;
|
||||
|
||||
get_file(ki->iocb.ki_filp);
|
||||
|
||||
old_nofs = memalloc_nofs_save();
|
||||
ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
|
||||
memalloc_nofs_restore(old_nofs);
|
||||
switch (ret) {
|
||||
case -EIOCBQUEUED:
|
||||
goto in_progress;
|
||||
|
||||
case -ERESTARTSYS:
|
||||
case -ERESTARTNOINTR:
|
||||
case -ERESTARTNOHAND:
|
||||
case -ERESTART_RESTARTBLOCK:
|
||||
/* There's no easy way to restart the syscall since other AIO's
|
||||
* may be already running. Just fail this IO with EINTR.
|
||||
*/
|
||||
ret = -EINTR;
|
||||
fallthrough;
|
||||
default:
|
||||
ki->was_async = false;
|
||||
cachefiles_read_complete(&ki->iocb, ret, 0);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
in_progress:
|
||||
cachefiles_put_kiocb(ki);
|
||||
_leave(" = %zd", ret);
|
||||
return ret;
|
||||
|
||||
presubmission_error:
|
||||
if (term_func)
|
||||
term_func(term_func_priv, ret < 0 ? ret : skipped, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle completion of a write to the cache.
|
||||
*/
|
||||
static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2)
|
||||
{
|
||||
struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
|
||||
struct inode *inode = file_inode(ki->iocb.ki_filp);
|
||||
|
||||
_enter("%ld,%ld", ret, ret2);
|
||||
|
||||
/* Tell lockdep we inherited freeze protection from submission thread */
|
||||
__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
|
||||
__sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
|
||||
|
||||
if (ki->term_func)
|
||||
ki->term_func(ki->term_func_priv, ret, ki->was_async);
|
||||
|
||||
cachefiles_put_kiocb(ki);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initiate a write to the cache.
|
||||
*/
|
||||
static int cachefiles_write(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv)
|
||||
{
|
||||
struct cachefiles_kiocb *ki;
|
||||
struct inode *inode;
|
||||
struct file *file = cres->cache_priv2;
|
||||
unsigned int old_nofs;
|
||||
ssize_t ret = -ENOBUFS;
|
||||
size_t len = iov_iter_count(iter);
|
||||
|
||||
_enter("%pD,%li,%llx,%zx/%llx",
|
||||
file, file_inode(file)->i_ino, start_pos, len,
|
||||
i_size_read(file->f_inode));
|
||||
|
||||
ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
|
||||
if (!ki)
|
||||
goto presubmission_error;
|
||||
|
||||
refcount_set(&ki->ki_refcnt, 2);
|
||||
ki->iocb.ki_filp = file;
|
||||
ki->iocb.ki_pos = start_pos;
|
||||
ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
|
||||
ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
|
||||
ki->iocb.ki_ioprio = get_current_ioprio();
|
||||
ki->start = start_pos;
|
||||
ki->len = len;
|
||||
ki->term_func = term_func;
|
||||
ki->term_func_priv = term_func_priv;
|
||||
ki->was_async = true;
|
||||
|
||||
if (ki->term_func)
|
||||
ki->iocb.ki_complete = cachefiles_write_complete;
|
||||
|
||||
/* Open-code file_start_write here to grab freeze protection, which
|
||||
* will be released by another thread in aio_complete_rw(). Fool
|
||||
* lockdep by telling it the lock got released so that it doesn't
|
||||
* complain about the held lock when we return to userspace.
|
||||
*/
|
||||
inode = file_inode(file);
|
||||
__sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
|
||||
__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
|
||||
|
||||
get_file(ki->iocb.ki_filp);
|
||||
|
||||
old_nofs = memalloc_nofs_save();
|
||||
ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
|
||||
memalloc_nofs_restore(old_nofs);
|
||||
switch (ret) {
|
||||
case -EIOCBQUEUED:
|
||||
goto in_progress;
|
||||
|
||||
case -ERESTARTSYS:
|
||||
case -ERESTARTNOINTR:
|
||||
case -ERESTARTNOHAND:
|
||||
case -ERESTART_RESTARTBLOCK:
|
||||
/* There's no easy way to restart the syscall since other AIO's
|
||||
* may be already running. Just fail this IO with EINTR.
|
||||
*/
|
||||
ret = -EINTR;
|
||||
fallthrough;
|
||||
default:
|
||||
ki->was_async = false;
|
||||
cachefiles_write_complete(&ki->iocb, ret, 0);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
in_progress:
|
||||
cachefiles_put_kiocb(ki);
|
||||
_leave(" = %zd", ret);
|
||||
return ret;
|
||||
|
||||
presubmission_error:
|
||||
if (term_func)
|
||||
term_func(term_func_priv, -ENOMEM, false);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a read operation, shortening it to a cached/uncached
|
||||
* boundary as appropriate.
|
||||
*/
|
||||
static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
|
||||
loff_t i_size)
|
||||
{
|
||||
struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv;
|
||||
struct cachefiles_object *object;
|
||||
struct cachefiles_cache *cache;
|
||||
const struct cred *saved_cred;
|
||||
struct file *file = subreq->rreq->cache_resources.cache_priv2;
|
||||
loff_t off, to;
|
||||
|
||||
_enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
|
||||
|
||||
object = container_of(op->op.object,
|
||||
struct cachefiles_object, fscache);
|
||||
cache = container_of(object->fscache.cache,
|
||||
struct cachefiles_cache, cache);
|
||||
|
||||
if (!file)
|
||||
goto cache_fail_nosec;
|
||||
|
||||
if (subreq->start >= i_size)
|
||||
return NETFS_FILL_WITH_ZEROES;
|
||||
|
||||
cachefiles_begin_secure(cache, &saved_cred);
|
||||
|
||||
off = vfs_llseek(file, subreq->start, SEEK_DATA);
|
||||
if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
|
||||
if (off == (loff_t)-ENXIO)
|
||||
goto download_and_store;
|
||||
goto cache_fail;
|
||||
}
|
||||
|
||||
if (off >= subreq->start + subreq->len)
|
||||
goto download_and_store;
|
||||
|
||||
if (off > subreq->start) {
|
||||
off = round_up(off, cache->bsize);
|
||||
subreq->len = off - subreq->start;
|
||||
goto download_and_store;
|
||||
}
|
||||
|
||||
to = vfs_llseek(file, subreq->start, SEEK_HOLE);
|
||||
if (to < 0 && to >= (loff_t)-MAX_ERRNO)
|
||||
goto cache_fail;
|
||||
|
||||
if (to < subreq->start + subreq->len) {
|
||||
if (subreq->start + subreq->len >= i_size)
|
||||
to = round_up(to, cache->bsize);
|
||||
else
|
||||
to = round_down(to, cache->bsize);
|
||||
subreq->len = to - subreq->start;
|
||||
}
|
||||
|
||||
cachefiles_end_secure(cache, saved_cred);
|
||||
return NETFS_READ_FROM_CACHE;
|
||||
|
||||
download_and_store:
|
||||
if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0)
|
||||
__set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
|
||||
cache_fail:
|
||||
cachefiles_end_secure(cache, saved_cred);
|
||||
cache_fail_nosec:
|
||||
return NETFS_DOWNLOAD_FROM_SERVER;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for a write to occur.
|
||||
*/
|
||||
static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
|
||||
loff_t *_start, size_t *_len, loff_t i_size)
|
||||
{
|
||||
loff_t start = *_start;
|
||||
size_t len = *_len, down;
|
||||
|
||||
/* Round to DIO size */
|
||||
down = start - round_down(start, PAGE_SIZE);
|
||||
*_start = start - down;
|
||||
*_len = round_up(down + len, PAGE_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up an operation.
|
||||
*/
|
||||
static void cachefiles_end_operation(struct netfs_cache_resources *cres)
|
||||
{
|
||||
struct fscache_retrieval *op = cres->cache_priv;
|
||||
struct file *file = cres->cache_priv2;
|
||||
|
||||
_enter("");
|
||||
|
||||
if (file)
|
||||
fput(file);
|
||||
if (op) {
|
||||
fscache_op_complete(&op->op, false);
|
||||
fscache_put_retrieval(op);
|
||||
}
|
||||
|
||||
_leave("");
|
||||
}
|
||||
|
||||
static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
|
||||
.end_operation = cachefiles_end_operation,
|
||||
.read = cachefiles_read,
|
||||
.write = cachefiles_write,
|
||||
.prepare_read = cachefiles_prepare_read,
|
||||
.prepare_write = cachefiles_prepare_write,
|
||||
};
|
||||
|
||||
/*
|
||||
* Open the cache file when beginning a cache operation.
|
||||
*/
|
||||
int cachefiles_begin_read_operation(struct netfs_read_request *rreq,
|
||||
struct fscache_retrieval *op)
|
||||
{
|
||||
struct cachefiles_object *object;
|
||||
struct cachefiles_cache *cache;
|
||||
struct path path;
|
||||
struct file *file;
|
||||
|
||||
_enter("");
|
||||
|
||||
object = container_of(op->op.object,
|
||||
struct cachefiles_object, fscache);
|
||||
cache = container_of(object->fscache.cache,
|
||||
struct cachefiles_cache, cache);
|
||||
|
||||
path.mnt = cache->mnt;
|
||||
path.dentry = object->backer;
|
||||
file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
|
||||
d_inode(object->backer), cache->cache_cred);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
if (!S_ISREG(file_inode(file)->i_mode))
|
||||
goto error_file;
|
||||
if (unlikely(!file->f_op->read_iter) ||
|
||||
unlikely(!file->f_op->write_iter)) {
|
||||
pr_notice("Cache does not support read_iter and write_iter\n");
|
||||
goto error_file;
|
||||
}
|
||||
|
||||
fscache_get_retrieval(op);
|
||||
rreq->cache_resources.cache_priv = op;
|
||||
rreq->cache_resources.cache_priv2 = file;
|
||||
rreq->cache_resources.ops = &cachefiles_netfs_cache_ops;
|
||||
rreq->cookie_debug_id = object->fscache.debug_id;
|
||||
_leave("");
|
||||
return 0;
|
||||
|
||||
error_file:
|
||||
fput(file);
|
||||
return -EIO;
|
||||
}
|
@ -370,7 +370,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
|
||||
pgoff_t index,
|
||||
unsigned long num_ra_pages)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
|
||||
struct page *page;
|
||||
|
||||
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
|
||||
|
@ -4051,7 +4051,7 @@ out:
|
||||
|
||||
static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct page *page;
|
||||
pgoff_t redirty_idx = page_idx;
|
||||
|
@ -228,7 +228,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
|
||||
pgoff_t index,
|
||||
unsigned long num_ra_pages)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
|
||||
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
|
||||
struct page *page;
|
||||
|
||||
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
config FSCACHE
|
||||
tristate "General filesystem local caching manager"
|
||||
select NETFS_SUPPORT
|
||||
help
|
||||
This option enables a generic filesystem caching manager that can be
|
||||
used by various network and other filesystems to cache data locally.
|
||||
|
@ -7,6 +7,7 @@ fscache-y := \
|
||||
cache.o \
|
||||
cookie.o \
|
||||
fsdef.o \
|
||||
io.o \
|
||||
main.o \
|
||||
netfs.o \
|
||||
object.o \
|
||||
|
@ -142,6 +142,10 @@ extern int fscache_wait_for_operation_activation(struct fscache_object *,
|
||||
atomic_t *,
|
||||
atomic_t *);
|
||||
extern void fscache_invalidate_writes(struct fscache_cookie *);
|
||||
struct fscache_retrieval *fscache_alloc_retrieval(struct fscache_cookie *cookie,
|
||||
struct address_space *mapping,
|
||||
fscache_rw_complete_t end_io_func,
|
||||
void *context);
|
||||
|
||||
/*
|
||||
* proc.c
|
||||
|
116
fs/fscache/io.c
Normal file
116
fs/fscache/io.c
Normal file
@ -0,0 +1,116 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Cache data I/O routines
|
||||
*
|
||||
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#define FSCACHE_DEBUG_LEVEL PAGE
|
||||
#include <linux/module.h>
|
||||
#define FSCACHE_USE_NEW_IO_API
|
||||
#include <linux/fscache-cache.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
/*
|
||||
* Start a cache read operation.
|
||||
* - we return:
|
||||
* -ENOMEM - out of memory, some pages may be being read
|
||||
* -ERESTARTSYS - interrupted, some pages may be being read
|
||||
* -ENOBUFS - no backing object or space available in which to cache any
|
||||
* pages not being read
|
||||
* -ENODATA - no data available in the backing object for some or all of
|
||||
* the pages
|
||||
* 0 - dispatched a read on all pages
|
||||
*/
|
||||
int __fscache_begin_read_operation(struct netfs_read_request *rreq,
|
||||
struct fscache_cookie *cookie)
|
||||
{
|
||||
struct fscache_retrieval *op;
|
||||
struct fscache_object *object;
|
||||
bool wake_cookie = false;
|
||||
int ret;
|
||||
|
||||
_enter("rr=%08x", rreq->debug_id);
|
||||
|
||||
fscache_stat(&fscache_n_retrievals);
|
||||
|
||||
if (hlist_empty(&cookie->backing_objects))
|
||||
goto nobufs;
|
||||
|
||||
if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) {
|
||||
_leave(" = -ENOBUFS [invalidating]");
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX);
|
||||
|
||||
if (fscache_wait_for_deferred_lookup(cookie) < 0)
|
||||
return -ERESTARTSYS;
|
||||
|
||||
op = fscache_alloc_retrieval(cookie, NULL, NULL, NULL);
|
||||
if (!op)
|
||||
return -ENOMEM;
|
||||
trace_fscache_page_op(cookie, NULL, &op->op, fscache_page_op_retr_multi);
|
||||
|
||||
spin_lock(&cookie->lock);
|
||||
|
||||
if (!fscache_cookie_enabled(cookie) ||
|
||||
hlist_empty(&cookie->backing_objects))
|
||||
goto nobufs_unlock;
|
||||
object = hlist_entry(cookie->backing_objects.first,
|
||||
struct fscache_object, cookie_link);
|
||||
|
||||
__fscache_use_cookie(cookie);
|
||||
atomic_inc(&object->n_reads);
|
||||
__set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags);
|
||||
|
||||
if (fscache_submit_op(object, &op->op) < 0)
|
||||
goto nobufs_unlock_dec;
|
||||
spin_unlock(&cookie->lock);
|
||||
|
||||
fscache_stat(&fscache_n_retrieval_ops);
|
||||
|
||||
/* we wait for the operation to become active, and then process it
|
||||
* *here*, in this thread, and not in the thread pool */
|
||||
ret = fscache_wait_for_operation_activation(
|
||||
object, &op->op,
|
||||
__fscache_stat(&fscache_n_retrieval_op_waits),
|
||||
__fscache_stat(&fscache_n_retrievals_object_dead));
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
/* ask the cache to honour the operation */
|
||||
ret = object->cache->ops->begin_read_operation(rreq, op);
|
||||
|
||||
error:
|
||||
if (ret == -ENOMEM)
|
||||
fscache_stat(&fscache_n_retrievals_nomem);
|
||||
else if (ret == -ERESTARTSYS)
|
||||
fscache_stat(&fscache_n_retrievals_intr);
|
||||
else if (ret == -ENODATA)
|
||||
fscache_stat(&fscache_n_retrievals_nodata);
|
||||
else if (ret < 0)
|
||||
fscache_stat(&fscache_n_retrievals_nobufs);
|
||||
else
|
||||
fscache_stat(&fscache_n_retrievals_ok);
|
||||
|
||||
fscache_put_retrieval(op);
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
|
||||
nobufs_unlock_dec:
|
||||
atomic_dec(&object->n_reads);
|
||||
wake_cookie = __fscache_unuse_cookie(cookie);
|
||||
nobufs_unlock:
|
||||
spin_unlock(&cookie->lock);
|
||||
fscache_put_retrieval(op);
|
||||
if (wake_cookie)
|
||||
__fscache_wake_unused_cookie(cookie);
|
||||
nobufs:
|
||||
fscache_stat(&fscache_n_retrievals_nobufs);
|
||||
_leave(" = -ENOBUFS");
|
||||
return -ENOBUFS;
|
||||
}
|
||||
EXPORT_SYMBOL(__fscache_begin_read_operation);
|
@ -299,7 +299,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op)
|
||||
/*
|
||||
* allocate a retrieval op
|
||||
*/
|
||||
static struct fscache_retrieval *fscache_alloc_retrieval(
|
||||
struct fscache_retrieval *fscache_alloc_retrieval(
|
||||
struct fscache_cookie *cookie,
|
||||
struct address_space *mapping,
|
||||
fscache_rw_complete_t end_io_func,
|
||||
|
@ -278,5 +278,6 @@ int fscache_stats_show(struct seq_file *m, void *v)
|
||||
atomic_read(&fscache_n_cache_stale_objects),
|
||||
atomic_read(&fscache_n_cache_retired_objects),
|
||||
atomic_read(&fscache_n_cache_culled_objects));
|
||||
netfs_stats_show(m);
|
||||
return 0;
|
||||
}
|
||||
|
23
fs/netfs/Kconfig
Normal file
23
fs/netfs/Kconfig
Normal file
@ -0,0 +1,23 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config NETFS_SUPPORT
|
||||
tristate "Support for network filesystem high-level I/O"
|
||||
help
|
||||
This option enables support for network filesystems, including
|
||||
helpers for high-level buffered I/O, abstracting out read
|
||||
segmentation, local caching and transparent huge page support.
|
||||
|
||||
config NETFS_STATS
|
||||
bool "Gather statistical information on local caching"
|
||||
depends on NETFS_SUPPORT && PROC_FS
|
||||
help
|
||||
This option causes statistical information to be gathered on local
|
||||
caching and exported through file:
|
||||
|
||||
/proc/fs/fscache/stats
|
||||
|
||||
The gathering of statistics adds a certain amount of overhead to
|
||||
execution as there are a quite a few stats gathered, and on a
|
||||
multi-CPU system these may be on cachelines that keep bouncing
|
||||
between CPUs. On the other hand, the stats are very useful for
|
||||
debugging purposes. Saying 'Y' here is recommended.
|
5
fs/netfs/Makefile
Normal file
5
fs/netfs/Makefile
Normal file
@ -0,0 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
netfs-y := read_helper.o stats.o
|
||||
|
||||
obj-$(CONFIG_NETFS_SUPPORT) := netfs.o
|
97
fs/netfs/internal.h
Normal file
97
fs/netfs/internal.h
Normal file
@ -0,0 +1,97 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Internal definitions for network filesystem support
|
||||
*
|
||||
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#ifdef pr_fmt
|
||||
#undef pr_fmt
|
||||
#endif
|
||||
|
||||
#define pr_fmt(fmt) "netfs: " fmt
|
||||
|
||||
/*
|
||||
* read_helper.c
|
||||
*/
|
||||
extern unsigned int netfs_debug;
|
||||
|
||||
/*
|
||||
* stats.c
|
||||
*/
|
||||
#ifdef CONFIG_NETFS_STATS
|
||||
extern atomic_t netfs_n_rh_readahead;
|
||||
extern atomic_t netfs_n_rh_readpage;
|
||||
extern atomic_t netfs_n_rh_rreq;
|
||||
extern atomic_t netfs_n_rh_sreq;
|
||||
extern atomic_t netfs_n_rh_download;
|
||||
extern atomic_t netfs_n_rh_download_done;
|
||||
extern atomic_t netfs_n_rh_download_failed;
|
||||
extern atomic_t netfs_n_rh_download_instead;
|
||||
extern atomic_t netfs_n_rh_read;
|
||||
extern atomic_t netfs_n_rh_read_done;
|
||||
extern atomic_t netfs_n_rh_read_failed;
|
||||
extern atomic_t netfs_n_rh_zero;
|
||||
extern atomic_t netfs_n_rh_short_read;
|
||||
extern atomic_t netfs_n_rh_write;
|
||||
extern atomic_t netfs_n_rh_write_begin;
|
||||
extern atomic_t netfs_n_rh_write_done;
|
||||
extern atomic_t netfs_n_rh_write_failed;
|
||||
extern atomic_t netfs_n_rh_write_zskip;
|
||||
|
||||
|
||||
static inline void netfs_stat(atomic_t *stat)
|
||||
{
|
||||
atomic_inc(stat);
|
||||
}
|
||||
|
||||
static inline void netfs_stat_d(atomic_t *stat)
|
||||
{
|
||||
atomic_dec(stat);
|
||||
}
|
||||
|
||||
#else
|
||||
#define netfs_stat(x) do {} while(0)
|
||||
#define netfs_stat_d(x) do {} while(0)
|
||||
#endif
|
||||
|
||||
/*****************************************************************************/
|
||||
/*
|
||||
* debug tracing
|
||||
*/
|
||||
#define dbgprintk(FMT, ...) \
|
||||
printk("[%-6.6s] "FMT"\n", current->comm, ##__VA_ARGS__)
|
||||
|
||||
#define kenter(FMT, ...) dbgprintk("==> %s("FMT")", __func__, ##__VA_ARGS__)
|
||||
#define kleave(FMT, ...) dbgprintk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
|
||||
#define kdebug(FMT, ...) dbgprintk(FMT, ##__VA_ARGS__)
|
||||
|
||||
#ifdef __KDEBUG
|
||||
#define _enter(FMT, ...) kenter(FMT, ##__VA_ARGS__)
|
||||
#define _leave(FMT, ...) kleave(FMT, ##__VA_ARGS__)
|
||||
#define _debug(FMT, ...) kdebug(FMT, ##__VA_ARGS__)
|
||||
|
||||
#elif defined(CONFIG_NETFS_DEBUG)
|
||||
#define _enter(FMT, ...) \
|
||||
do { \
|
||||
if (netfs_debug) \
|
||||
kenter(FMT, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define _leave(FMT, ...) \
|
||||
do { \
|
||||
if (netfs_debug) \
|
||||
kleave(FMT, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define _debug(FMT, ...) \
|
||||
do { \
|
||||
if (netfs_debug) \
|
||||
kdebug(FMT, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
#define _enter(FMT, ...) no_printk("==> %s("FMT")", __func__, ##__VA_ARGS__)
|
||||
#define _leave(FMT, ...) no_printk("<== %s()"FMT"", __func__, ##__VA_ARGS__)
|
||||
#define _debug(FMT, ...) no_printk(FMT, ##__VA_ARGS__)
|
||||
#endif
|
1185
fs/netfs/read_helper.c
Normal file
1185
fs/netfs/read_helper.c
Normal file
File diff suppressed because it is too large
Load Diff
59
fs/netfs/stats.c
Normal file
59
fs/netfs/stats.c
Normal file
@ -0,0 +1,59 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/* Netfs support statistics
|
||||
*
|
||||
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/netfs.h>
|
||||
#include "internal.h"
|
||||
|
||||
atomic_t netfs_n_rh_readahead;
|
||||
atomic_t netfs_n_rh_readpage;
|
||||
atomic_t netfs_n_rh_rreq;
|
||||
atomic_t netfs_n_rh_sreq;
|
||||
atomic_t netfs_n_rh_download;
|
||||
atomic_t netfs_n_rh_download_done;
|
||||
atomic_t netfs_n_rh_download_failed;
|
||||
atomic_t netfs_n_rh_download_instead;
|
||||
atomic_t netfs_n_rh_read;
|
||||
atomic_t netfs_n_rh_read_done;
|
||||
atomic_t netfs_n_rh_read_failed;
|
||||
atomic_t netfs_n_rh_zero;
|
||||
atomic_t netfs_n_rh_short_read;
|
||||
atomic_t netfs_n_rh_write;
|
||||
atomic_t netfs_n_rh_write_begin;
|
||||
atomic_t netfs_n_rh_write_done;
|
||||
atomic_t netfs_n_rh_write_failed;
|
||||
atomic_t netfs_n_rh_write_zskip;
|
||||
|
||||
void netfs_stats_show(struct seq_file *m)
|
||||
{
|
||||
seq_printf(m, "RdHelp : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n",
|
||||
atomic_read(&netfs_n_rh_readahead),
|
||||
atomic_read(&netfs_n_rh_readpage),
|
||||
atomic_read(&netfs_n_rh_write_begin),
|
||||
atomic_read(&netfs_n_rh_write_zskip),
|
||||
atomic_read(&netfs_n_rh_rreq),
|
||||
atomic_read(&netfs_n_rh_sreq));
|
||||
seq_printf(m, "RdHelp : ZR=%u sh=%u sk=%u\n",
|
||||
atomic_read(&netfs_n_rh_zero),
|
||||
atomic_read(&netfs_n_rh_short_read),
|
||||
atomic_read(&netfs_n_rh_write_zskip));
|
||||
seq_printf(m, "RdHelp : DL=%u ds=%u df=%u di=%u\n",
|
||||
atomic_read(&netfs_n_rh_download),
|
||||
atomic_read(&netfs_n_rh_download_done),
|
||||
atomic_read(&netfs_n_rh_download_failed),
|
||||
atomic_read(&netfs_n_rh_download_instead));
|
||||
seq_printf(m, "RdHelp : RD=%u rs=%u rf=%u\n",
|
||||
atomic_read(&netfs_n_rh_read),
|
||||
atomic_read(&netfs_n_rh_read_done),
|
||||
atomic_read(&netfs_n_rh_read_failed));
|
||||
seq_printf(m, "RdHelp : WR=%u ws=%u wf=%u\n",
|
||||
atomic_read(&netfs_n_rh_write),
|
||||
atomic_read(&netfs_n_rh_write_done),
|
||||
atomic_read(&netfs_n_rh_write_failed));
|
||||
}
|
||||
EXPORT_SYMBOL(netfs_stats_show);
|
@ -891,18 +891,22 @@ struct fown_struct {
|
||||
int signum; /* posix.1b rt signal to be delivered on IO */
|
||||
};
|
||||
|
||||
/*
|
||||
* Track a single file's readahead state
|
||||
/**
|
||||
* struct file_ra_state - Track a file's readahead state.
|
||||
* @start: Where the most recent readahead started.
|
||||
* @size: Number of pages read in the most recent readahead.
|
||||
* @async_size: Start next readahead when this many pages are left.
|
||||
* @ra_pages: Maximum size of a readahead request.
|
||||
* @mmap_miss: How many mmap accesses missed in the page cache.
|
||||
* @prev_pos: The last byte in the most recent read request.
|
||||
*/
|
||||
struct file_ra_state {
|
||||
pgoff_t start; /* where readahead started */
|
||||
unsigned int size; /* # of readahead pages */
|
||||
unsigned int async_size; /* do asynchronous readahead when
|
||||
there are only # of pages ahead */
|
||||
|
||||
unsigned int ra_pages; /* Maximum readahead window */
|
||||
unsigned int mmap_miss; /* Cache miss stat for mmap accesses */
|
||||
loff_t prev_pos; /* Cache last read() position */
|
||||
pgoff_t start;
|
||||
unsigned int size;
|
||||
unsigned int async_size;
|
||||
unsigned int ra_pages;
|
||||
unsigned int mmap_miss;
|
||||
loff_t prev_pos;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -304,6 +304,10 @@ struct fscache_cache_ops {
|
||||
|
||||
/* dissociate a cache from all the pages it was backing */
|
||||
void (*dissociate_pages)(struct fscache_cache *cache);
|
||||
|
||||
/* Begin a read operation for the netfs lib */
|
||||
int (*begin_read_operation)(struct netfs_read_request *rreq,
|
||||
struct fscache_retrieval *op);
|
||||
};
|
||||
|
||||
extern struct fscache_cookie fscache_fsdef_index;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/list_bl.h>
|
||||
#include <linux/netfs.h>
|
||||
|
||||
#if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE)
|
||||
#define fscache_available() (1)
|
||||
@ -29,16 +30,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* overload PG_private_2 to give us PG_fscache - this is used to indicate that
|
||||
* a page is currently backed by a local disk cache
|
||||
*/
|
||||
#define PageFsCache(page) PagePrivate2((page))
|
||||
#define SetPageFsCache(page) SetPagePrivate2((page))
|
||||
#define ClearPageFsCache(page) ClearPagePrivate2((page))
|
||||
#define TestSetPageFsCache(page) TestSetPagePrivate2((page))
|
||||
#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
|
||||
|
||||
/* pattern used to fill dead space in an index entry */
|
||||
#define FSCACHE_INDEX_DEADFILL_PATTERN 0x79
|
||||
|
||||
@ -46,6 +37,7 @@ struct pagevec;
|
||||
struct fscache_cache_tag;
|
||||
struct fscache_cookie;
|
||||
struct fscache_netfs;
|
||||
struct netfs_read_request;
|
||||
|
||||
typedef void (*fscache_rw_complete_t)(struct page *page,
|
||||
void *context,
|
||||
@ -200,6 +192,10 @@ extern void __fscache_update_cookie(struct fscache_cookie *, const void *);
|
||||
extern int __fscache_attr_changed(struct fscache_cookie *);
|
||||
extern void __fscache_invalidate(struct fscache_cookie *);
|
||||
extern void __fscache_wait_on_invalidate(struct fscache_cookie *);
|
||||
|
||||
#ifdef FSCACHE_USE_NEW_IO_API
|
||||
extern int __fscache_begin_read_operation(struct netfs_read_request *, struct fscache_cookie *);
|
||||
#else
|
||||
extern int __fscache_read_or_alloc_page(struct fscache_cookie *,
|
||||
struct page *,
|
||||
fscache_rw_complete_t,
|
||||
@ -223,6 +219,8 @@ extern void __fscache_uncache_all_inode_pages(struct fscache_cookie *,
|
||||
struct inode *);
|
||||
extern void __fscache_readpages_cancel(struct fscache_cookie *cookie,
|
||||
struct list_head *pages);
|
||||
#endif /* FSCACHE_USE_NEW_IO_API */
|
||||
|
||||
extern void __fscache_disable_cookie(struct fscache_cookie *, const void *, bool);
|
||||
extern void __fscache_enable_cookie(struct fscache_cookie *, const void *, loff_t,
|
||||
bool (*)(void *), void *);
|
||||
@ -507,6 +505,36 @@ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size)
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
#ifdef FSCACHE_USE_NEW_IO_API
|
||||
|
||||
/**
|
||||
* fscache_begin_read_operation - Begin a read operation for the netfs lib
|
||||
* @rreq: The read request being undertaken
|
||||
* @cookie: The cookie representing the cache object
|
||||
*
|
||||
* Begin a read operation on behalf of the netfs helper library. @rreq
|
||||
* indicates the read request to which the operation state should be attached;
|
||||
* @cookie indicates the cache object that will be accessed.
|
||||
*
|
||||
* This is intended to be called from the ->begin_cache_operation() netfs lib
|
||||
* operation as implemented by the network filesystem.
|
||||
*
|
||||
* Returns:
|
||||
* * 0 - Success
|
||||
* * -ENOBUFS - No caching available
|
||||
* * Other error code from the cache, such as -ENOMEM.
|
||||
*/
|
||||
static inline
|
||||
int fscache_begin_read_operation(struct netfs_read_request *rreq,
|
||||
struct fscache_cookie *cookie)
|
||||
{
|
||||
if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie))
|
||||
return __fscache_begin_read_operation(rreq, cookie);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
|
||||
#else /* FSCACHE_USE_NEW_IO_API */
|
||||
|
||||
/**
|
||||
* fscache_read_or_alloc_page - Read a page from the cache or allocate a block
|
||||
* in which to store it
|
||||
@ -786,6 +814,8 @@ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
|
||||
__fscache_uncache_all_inode_pages(cookie, inode);
|
||||
}
|
||||
|
||||
#endif /* FSCACHE_USE_NEW_IO_API */
|
||||
|
||||
/**
|
||||
* fscache_disable_cookie - Disable a cookie
|
||||
* @cookie: The cookie representing the cache object
|
||||
|
234
include/linux/netfs.h
Normal file
234
include/linux/netfs.h
Normal file
@ -0,0 +1,234 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Network filesystem support services.
|
||||
*
|
||||
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*
|
||||
* See:
|
||||
*
|
||||
* Documentation/filesystems/netfs_library.rst
|
||||
*
|
||||
* for a description of the network filesystem interface declared here.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_NETFS_H
|
||||
#define _LINUX_NETFS_H
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
/*
|
||||
* Overload PG_private_2 to give us PG_fscache - this is used to indicate that
|
||||
* a page is currently backed by a local disk cache
|
||||
*/
|
||||
#define PageFsCache(page) PagePrivate2((page))
|
||||
#define SetPageFsCache(page) SetPagePrivate2((page))
|
||||
#define ClearPageFsCache(page) ClearPagePrivate2((page))
|
||||
#define TestSetPageFsCache(page) TestSetPagePrivate2((page))
|
||||
#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
|
||||
|
||||
/**
|
||||
* set_page_fscache - Set PG_fscache on a page and take a ref
|
||||
* @page: The page.
|
||||
*
|
||||
* Set the PG_fscache (PG_private_2) flag on a page and take the reference
|
||||
* needed for the VM to handle its lifetime correctly. This sets the flag and
|
||||
* takes the reference unconditionally, so care must be taken not to set the
|
||||
* flag again if it's already set.
|
||||
*/
|
||||
static inline void set_page_fscache(struct page *page)
|
||||
{
|
||||
set_page_private_2(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* end_page_fscache - Clear PG_fscache and release any waiters
|
||||
* @page: The page
|
||||
*
|
||||
* Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers
|
||||
* waiting for this. The page ref held for PG_private_2 being set is released.
|
||||
*
|
||||
* This is, for example, used when a netfs page is being written to a local
|
||||
* disk cache, thereby allowing writes to the cache for the same page to be
|
||||
* serialised.
|
||||
*/
|
||||
static inline void end_page_fscache(struct page *page)
|
||||
{
|
||||
end_page_private_2(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_on_page_fscache - Wait for PG_fscache to be cleared on a page
|
||||
* @page: The page to wait on
|
||||
*
|
||||
* Wait for PG_fscache (aka PG_private_2) to be cleared on a page.
|
||||
*/
|
||||
static inline void wait_on_page_fscache(struct page *page)
|
||||
{
|
||||
wait_on_page_private_2(page);
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page
|
||||
* @page: The page to wait on
|
||||
*
|
||||
* Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a
|
||||
* fatal signal is received by the calling task.
|
||||
*
|
||||
* Return:
|
||||
* - 0 if successful.
|
||||
* - -EINTR if a fatal signal was encountered.
|
||||
*/
|
||||
static inline int wait_on_page_fscache_killable(struct page *page)
|
||||
{
|
||||
return wait_on_page_private_2_killable(page);
|
||||
}
|
||||
|
||||
enum netfs_read_source {
|
||||
NETFS_FILL_WITH_ZEROES,
|
||||
NETFS_DOWNLOAD_FROM_SERVER,
|
||||
NETFS_READ_FROM_CACHE,
|
||||
NETFS_INVALID_READ,
|
||||
} __mode(byte);
|
||||
|
||||
typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
|
||||
bool was_async);
|
||||
|
||||
/*
|
||||
* Resources required to do operations on a cache.
|
||||
*/
|
||||
struct netfs_cache_resources {
|
||||
const struct netfs_cache_ops *ops;
|
||||
void *cache_priv;
|
||||
void *cache_priv2;
|
||||
};
|
||||
|
||||
/*
|
||||
* Descriptor for a single component subrequest.
|
||||
*/
|
||||
struct netfs_read_subrequest {
|
||||
struct netfs_read_request *rreq; /* Supervising read request */
|
||||
struct list_head rreq_link; /* Link in rreq->subrequests */
|
||||
loff_t start; /* Where to start the I/O */
|
||||
size_t len; /* Size of the I/O */
|
||||
size_t transferred; /* Amount of data transferred */
|
||||
refcount_t usage;
|
||||
short error; /* 0 or error that occurred */
|
||||
unsigned short debug_index; /* Index in list (for debugging output) */
|
||||
enum netfs_read_source source; /* Where to read from */
|
||||
unsigned long flags;
|
||||
#define NETFS_SREQ_WRITE_TO_CACHE 0 /* Set if should write to cache */
|
||||
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
|
||||
#define NETFS_SREQ_SHORT_READ 2 /* Set if there was a short read from the cache */
|
||||
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
|
||||
#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
|
||||
};
|
||||
|
||||
/*
|
||||
* Descriptor for a read helper request. This is used to make multiple I/O
|
||||
* requests on a variety of sources and then stitch the result together.
|
||||
*/
|
||||
struct netfs_read_request {
|
||||
struct work_struct work;
|
||||
struct inode *inode; /* The file being accessed */
|
||||
struct address_space *mapping; /* The mapping being accessed */
|
||||
struct netfs_cache_resources cache_resources;
|
||||
struct list_head subrequests; /* Requests to fetch I/O from disk or net */
|
||||
void *netfs_priv; /* Private data for the netfs */
|
||||
unsigned int debug_id;
|
||||
unsigned int cookie_debug_id;
|
||||
atomic_t nr_rd_ops; /* Number of read ops in progress */
|
||||
atomic_t nr_wr_ops; /* Number of write ops in progress */
|
||||
size_t submitted; /* Amount submitted for I/O so far */
|
||||
size_t len; /* Length of the request */
|
||||
short error; /* 0 or error that occurred */
|
||||
loff_t i_size; /* Size of the file */
|
||||
loff_t start; /* Start position */
|
||||
pgoff_t no_unlock_page; /* Don't unlock this page after read */
|
||||
refcount_t usage;
|
||||
unsigned long flags;
|
||||
#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
|
||||
#define NETFS_RREQ_WRITE_TO_CACHE 1 /* Need to write to the cache */
|
||||
#define NETFS_RREQ_NO_UNLOCK_PAGE 2 /* Don't unlock no_unlock_page on completion */
|
||||
#define NETFS_RREQ_DONT_UNLOCK_PAGES 3 /* Don't unlock the pages on completion */
|
||||
#define NETFS_RREQ_FAILED 4 /* The request failed */
|
||||
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
|
||||
const struct netfs_read_request_ops *netfs_ops;
|
||||
};
|
||||
|
||||
/*
|
||||
* Operations the network filesystem can/must provide to the helpers.
|
||||
*/
|
||||
struct netfs_read_request_ops {
|
||||
bool (*is_cache_enabled)(struct inode *inode);
|
||||
void (*init_rreq)(struct netfs_read_request *rreq, struct file *file);
|
||||
int (*begin_cache_operation)(struct netfs_read_request *rreq);
|
||||
void (*expand_readahead)(struct netfs_read_request *rreq);
|
||||
bool (*clamp_length)(struct netfs_read_subrequest *subreq);
|
||||
void (*issue_op)(struct netfs_read_subrequest *subreq);
|
||||
bool (*is_still_valid)(struct netfs_read_request *rreq);
|
||||
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
|
||||
struct page *page, void **_fsdata);
|
||||
void (*done)(struct netfs_read_request *rreq);
|
||||
void (*cleanup)(struct address_space *mapping, void *netfs_priv);
|
||||
};
|
||||
|
||||
/*
|
||||
* Table of operations for access to a cache. This is obtained by
|
||||
* rreq->ops->begin_cache_operation().
|
||||
*/
|
||||
struct netfs_cache_ops {
|
||||
/* End an operation */
|
||||
void (*end_operation)(struct netfs_cache_resources *cres);
|
||||
|
||||
/* Read data from the cache */
|
||||
int (*read)(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
bool seek_data,
|
||||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv);
|
||||
|
||||
/* Write data to the cache */
|
||||
int (*write)(struct netfs_cache_resources *cres,
|
||||
loff_t start_pos,
|
||||
struct iov_iter *iter,
|
||||
netfs_io_terminated_t term_func,
|
||||
void *term_func_priv);
|
||||
|
||||
/* Expand readahead request */
|
||||
void (*expand_readahead)(struct netfs_cache_resources *cres,
|
||||
loff_t *_start, size_t *_len, loff_t i_size);
|
||||
|
||||
/* Prepare a read operation, shortening it to a cached/uncached
|
||||
* boundary as appropriate.
|
||||
*/
|
||||
enum netfs_read_source (*prepare_read)(struct netfs_read_subrequest *subreq,
|
||||
loff_t i_size);
|
||||
|
||||
/* Prepare a write operation, working out what part of the write we can
|
||||
* actually do.
|
||||
*/
|
||||
int (*prepare_write)(struct netfs_cache_resources *cres,
|
||||
loff_t *_start, size_t *_len, loff_t i_size);
|
||||
};
|
||||
|
||||
struct readahead_control;
|
||||
extern void netfs_readahead(struct readahead_control *,
|
||||
const struct netfs_read_request_ops *,
|
||||
void *);
|
||||
extern int netfs_readpage(struct file *,
|
||||
struct page *,
|
||||
const struct netfs_read_request_ops *,
|
||||
void *);
|
||||
extern int netfs_write_begin(struct file *, struct address_space *,
|
||||
loff_t, unsigned int, unsigned int, struct page **,
|
||||
void **,
|
||||
const struct netfs_read_request_ops *,
|
||||
void *);
|
||||
|
||||
extern void netfs_subreq_terminated(struct netfs_read_subrequest *, ssize_t, bool);
|
||||
extern void netfs_stats_show(struct seq_file *);
|
||||
|
||||
#endif /* _LINUX_NETFS_H */
|
@ -688,6 +688,26 @@ void wait_for_stable_page(struct page *page);
|
||||
|
||||
void page_endio(struct page *page, bool is_write, int err);
|
||||
|
||||
/**
|
||||
* set_page_private_2 - Set PG_private_2 on a page and take a ref
|
||||
* @page: The page.
|
||||
*
|
||||
* Set the PG_private_2 flag on a page and take the reference needed for the VM
|
||||
* to handle its lifetime correctly. This sets the flag and takes the
|
||||
* reference unconditionally, so care must be taken not to set the flag again
|
||||
* if it's already set.
|
||||
*/
|
||||
static inline void set_page_private_2(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
get_page(page);
|
||||
SetPagePrivate2(page);
|
||||
}
|
||||
|
||||
void end_page_private_2(struct page *page);
|
||||
void wait_on_page_private_2(struct page *page);
|
||||
int wait_on_page_private_2_killable(struct page *page);
|
||||
|
||||
/*
|
||||
* Add an arbitrary waiter to a page's wait queue
|
||||
*/
|
||||
@ -792,20 +812,23 @@ static inline int add_to_page_cache(struct page *page,
|
||||
* @file: The file, used primarily by network filesystems for authentication.
|
||||
* May be NULL if invoked internally by the filesystem.
|
||||
* @mapping: Readahead this filesystem object.
|
||||
* @ra: File readahead state. May be NULL.
|
||||
*/
|
||||
struct readahead_control {
|
||||
struct file *file;
|
||||
struct address_space *mapping;
|
||||
struct file_ra_state *ra;
|
||||
/* private: use the readahead_* accessors instead */
|
||||
pgoff_t _index;
|
||||
unsigned int _nr_pages;
|
||||
unsigned int _batch_count;
|
||||
};
|
||||
|
||||
#define DEFINE_READAHEAD(rac, f, m, i) \
|
||||
struct readahead_control rac = { \
|
||||
#define DEFINE_READAHEAD(ractl, f, r, m, i) \
|
||||
struct readahead_control ractl = { \
|
||||
.file = f, \
|
||||
.mapping = m, \
|
||||
.ra = r, \
|
||||
._index = i, \
|
||||
}
|
||||
|
||||
@ -813,10 +836,11 @@ struct readahead_control {
|
||||
|
||||
void page_cache_ra_unbounded(struct readahead_control *,
|
||||
unsigned long nr_to_read, unsigned long lookahead_count);
|
||||
void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *,
|
||||
void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
|
||||
void page_cache_async_ra(struct readahead_control *, struct page *,
|
||||
unsigned long req_count);
|
||||
void page_cache_async_ra(struct readahead_control *, struct file_ra_state *,
|
||||
struct page *, unsigned long req_count);
|
||||
void readahead_expand(struct readahead_control *ractl,
|
||||
loff_t new_start, size_t new_len);
|
||||
|
||||
/**
|
||||
* page_cache_sync_readahead - generic file readahead
|
||||
@ -836,8 +860,8 @@ void page_cache_sync_readahead(struct address_space *mapping,
|
||||
struct file_ra_state *ra, struct file *file, pgoff_t index,
|
||||
unsigned long req_count)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, file, mapping, index);
|
||||
page_cache_sync_ra(&ractl, ra, req_count);
|
||||
DEFINE_READAHEAD(ractl, file, ra, mapping, index);
|
||||
page_cache_sync_ra(&ractl, req_count);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -859,8 +883,8 @@ void page_cache_async_readahead(struct address_space *mapping,
|
||||
struct file_ra_state *ra, struct file *file,
|
||||
struct page *page, pgoff_t index, unsigned long req_count)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, file, mapping, index);
|
||||
page_cache_async_ra(&ractl, ra, page, req_count);
|
||||
DEFINE_READAHEAD(ractl, file, ra, mapping, index);
|
||||
page_cache_async_ra(&ractl, page, req_count);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -24,6 +24,7 @@ enum iter_type {
|
||||
ITER_BVEC = 16,
|
||||
ITER_PIPE = 32,
|
||||
ITER_DISCARD = 64,
|
||||
ITER_XARRAY = 128,
|
||||
};
|
||||
|
||||
struct iov_iter {
|
||||
@ -39,6 +40,7 @@ struct iov_iter {
|
||||
const struct iovec *iov;
|
||||
const struct kvec *kvec;
|
||||
const struct bio_vec *bvec;
|
||||
struct xarray *xarray;
|
||||
struct pipe_inode_info *pipe;
|
||||
};
|
||||
union {
|
||||
@ -47,6 +49,7 @@ struct iov_iter {
|
||||
unsigned int head;
|
||||
unsigned int start_head;
|
||||
};
|
||||
loff_t xarray_start;
|
||||
};
|
||||
};
|
||||
|
||||
@ -80,6 +83,11 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i)
|
||||
return iov_iter_type(i) == ITER_DISCARD;
|
||||
}
|
||||
|
||||
static inline bool iov_iter_is_xarray(const struct iov_iter *i)
|
||||
{
|
||||
return iov_iter_type(i) == ITER_XARRAY;
|
||||
}
|
||||
|
||||
static inline unsigned char iov_iter_rw(const struct iov_iter *i)
|
||||
{
|
||||
return i->type & (READ | WRITE);
|
||||
@ -221,6 +229,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_
|
||||
void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
|
||||
size_t count);
|
||||
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
|
||||
void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
|
||||
loff_t start, size_t count);
|
||||
ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
|
||||
size_t maxsize, unsigned maxpages, size_t *start);
|
||||
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
|
||||
|
261
include/trace/events/netfs.h
Normal file
261
include/trace/events/netfs.h
Normal file
@ -0,0 +1,261 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Network filesystem support module tracepoints
|
||||
*
|
||||
* Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
|
||||
* Written by David Howells (dhowells@redhat.com)
|
||||
*/
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM netfs
|
||||
|
||||
#if !defined(_TRACE_NETFS_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_NETFS_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
/*
|
||||
* Define enums for tracing information.
|
||||
*/
|
||||
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
|
||||
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
|
||||
|
||||
enum netfs_read_trace {
|
||||
netfs_read_trace_expanded,
|
||||
netfs_read_trace_readahead,
|
||||
netfs_read_trace_readpage,
|
||||
netfs_read_trace_write_begin,
|
||||
};
|
||||
|
||||
enum netfs_rreq_trace {
|
||||
netfs_rreq_trace_assess,
|
||||
netfs_rreq_trace_done,
|
||||
netfs_rreq_trace_free,
|
||||
netfs_rreq_trace_resubmit,
|
||||
netfs_rreq_trace_unlock,
|
||||
netfs_rreq_trace_unmark,
|
||||
netfs_rreq_trace_write,
|
||||
};
|
||||
|
||||
enum netfs_sreq_trace {
|
||||
netfs_sreq_trace_download_instead,
|
||||
netfs_sreq_trace_free,
|
||||
netfs_sreq_trace_prepare,
|
||||
netfs_sreq_trace_resubmit_short,
|
||||
netfs_sreq_trace_submit,
|
||||
netfs_sreq_trace_terminated,
|
||||
netfs_sreq_trace_write,
|
||||
netfs_sreq_trace_write_skip,
|
||||
netfs_sreq_trace_write_term,
|
||||
};
|
||||
|
||||
enum netfs_failure {
|
||||
netfs_fail_check_write_begin,
|
||||
netfs_fail_copy_to_cache,
|
||||
netfs_fail_read,
|
||||
netfs_fail_short_readpage,
|
||||
netfs_fail_short_write_begin,
|
||||
netfs_fail_prepare_write,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#define netfs_read_traces \
|
||||
EM(netfs_read_trace_expanded, "EXPANDED ") \
|
||||
EM(netfs_read_trace_readahead, "READAHEAD") \
|
||||
EM(netfs_read_trace_readpage, "READPAGE ") \
|
||||
E_(netfs_read_trace_write_begin, "WRITEBEGN")
|
||||
|
||||
#define netfs_rreq_traces \
|
||||
EM(netfs_rreq_trace_assess, "ASSESS") \
|
||||
EM(netfs_rreq_trace_done, "DONE ") \
|
||||
EM(netfs_rreq_trace_free, "FREE ") \
|
||||
EM(netfs_rreq_trace_resubmit, "RESUBM") \
|
||||
EM(netfs_rreq_trace_unlock, "UNLOCK") \
|
||||
EM(netfs_rreq_trace_unmark, "UNMARK") \
|
||||
E_(netfs_rreq_trace_write, "WRITE ")
|
||||
|
||||
#define netfs_sreq_sources \
|
||||
EM(NETFS_FILL_WITH_ZEROES, "ZERO") \
|
||||
EM(NETFS_DOWNLOAD_FROM_SERVER, "DOWN") \
|
||||
EM(NETFS_READ_FROM_CACHE, "READ") \
|
||||
E_(NETFS_INVALID_READ, "INVL") \
|
||||
|
||||
#define netfs_sreq_traces \
|
||||
EM(netfs_sreq_trace_download_instead, "RDOWN") \
|
||||
EM(netfs_sreq_trace_free, "FREE ") \
|
||||
EM(netfs_sreq_trace_prepare, "PREP ") \
|
||||
EM(netfs_sreq_trace_resubmit_short, "SHORT") \
|
||||
EM(netfs_sreq_trace_submit, "SUBMT") \
|
||||
EM(netfs_sreq_trace_terminated, "TERM ") \
|
||||
EM(netfs_sreq_trace_write, "WRITE") \
|
||||
EM(netfs_sreq_trace_write_skip, "SKIP ") \
|
||||
E_(netfs_sreq_trace_write_term, "WTERM")
|
||||
|
||||
#define netfs_failures \
|
||||
EM(netfs_fail_check_write_begin, "check-write-begin") \
|
||||
EM(netfs_fail_copy_to_cache, "copy-to-cache") \
|
||||
EM(netfs_fail_read, "read") \
|
||||
EM(netfs_fail_short_readpage, "short-readpage") \
|
||||
EM(netfs_fail_short_write_begin, "short-write-begin") \
|
||||
E_(netfs_fail_prepare_write, "prep-write")
|
||||
|
||||
|
||||
/*
|
||||
* Export enum symbols via userspace.
|
||||
*/
|
||||
#undef EM
|
||||
#undef E_
|
||||
#define EM(a, b) TRACE_DEFINE_ENUM(a);
|
||||
#define E_(a, b) TRACE_DEFINE_ENUM(a);
|
||||
|
||||
netfs_read_traces;
|
||||
netfs_rreq_traces;
|
||||
netfs_sreq_sources;
|
||||
netfs_sreq_traces;
|
||||
netfs_failures;
|
||||
|
||||
/*
|
||||
* Now redefine the EM() and E_() macros to map the enums to the strings that
|
||||
* will be printed in the output.
|
||||
*/
|
||||
#undef EM
|
||||
#undef E_
|
||||
#define EM(a, b) { a, b },
|
||||
#define E_(a, b) { a, b }
|
||||
|
||||
TRACE_EVENT(netfs_read,
|
||||
TP_PROTO(struct netfs_read_request *rreq,
|
||||
loff_t start, size_t len,
|
||||
enum netfs_read_trace what),
|
||||
|
||||
TP_ARGS(rreq, start, len, what),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq )
|
||||
__field(unsigned int, cookie )
|
||||
__field(loff_t, start )
|
||||
__field(size_t, len )
|
||||
__field(enum netfs_read_trace, what )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rreq = rreq->debug_id;
|
||||
__entry->cookie = rreq->cookie_debug_id;
|
||||
__entry->start = start;
|
||||
__entry->len = len;
|
||||
__entry->what = what;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x %s c=%08x s=%llx %zx",
|
||||
__entry->rreq,
|
||||
__print_symbolic(__entry->what, netfs_read_traces),
|
||||
__entry->cookie,
|
||||
__entry->start, __entry->len)
|
||||
);
|
||||
|
||||
TRACE_EVENT(netfs_rreq,
|
||||
TP_PROTO(struct netfs_read_request *rreq,
|
||||
enum netfs_rreq_trace what),
|
||||
|
||||
TP_ARGS(rreq, what),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq )
|
||||
__field(unsigned short, flags )
|
||||
__field(enum netfs_rreq_trace, what )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rreq = rreq->debug_id;
|
||||
__entry->flags = rreq->flags;
|
||||
__entry->what = what;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x %s f=%02x",
|
||||
__entry->rreq,
|
||||
__print_symbolic(__entry->what, netfs_rreq_traces),
|
||||
__entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(netfs_sreq,
|
||||
TP_PROTO(struct netfs_read_subrequest *sreq,
|
||||
enum netfs_sreq_trace what),
|
||||
|
||||
TP_ARGS(sreq, what),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq )
|
||||
__field(unsigned short, index )
|
||||
__field(short, error )
|
||||
__field(unsigned short, flags )
|
||||
__field(enum netfs_read_source, source )
|
||||
__field(enum netfs_sreq_trace, what )
|
||||
__field(size_t, len )
|
||||
__field(size_t, transferred )
|
||||
__field(loff_t, start )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rreq = sreq->rreq->debug_id;
|
||||
__entry->index = sreq->debug_index;
|
||||
__entry->error = sreq->error;
|
||||
__entry->flags = sreq->flags;
|
||||
__entry->source = sreq->source;
|
||||
__entry->what = what;
|
||||
__entry->len = sreq->len;
|
||||
__entry->transferred = sreq->transferred;
|
||||
__entry->start = sreq->start;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d",
|
||||
__entry->rreq, __entry->index,
|
||||
__print_symbolic(__entry->what, netfs_sreq_traces),
|
||||
__print_symbolic(__entry->source, netfs_sreq_sources),
|
||||
__entry->flags,
|
||||
__entry->start, __entry->transferred, __entry->len,
|
||||
__entry->error)
|
||||
);
|
||||
|
||||
TRACE_EVENT(netfs_failure,
|
||||
TP_PROTO(struct netfs_read_request *rreq,
|
||||
struct netfs_read_subrequest *sreq,
|
||||
int error, enum netfs_failure what),
|
||||
|
||||
TP_ARGS(rreq, sreq, error, what),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, rreq )
|
||||
__field(unsigned short, index )
|
||||
__field(short, error )
|
||||
__field(unsigned short, flags )
|
||||
__field(enum netfs_read_source, source )
|
||||
__field(enum netfs_failure, what )
|
||||
__field(size_t, len )
|
||||
__field(size_t, transferred )
|
||||
__field(loff_t, start )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->rreq = rreq->debug_id;
|
||||
__entry->index = sreq ? sreq->debug_index : 0;
|
||||
__entry->error = error;
|
||||
__entry->flags = sreq ? sreq->flags : 0;
|
||||
__entry->source = sreq ? sreq->source : NETFS_INVALID_READ;
|
||||
__entry->what = what;
|
||||
__entry->len = sreq ? sreq->len : 0;
|
||||
__entry->transferred = sreq ? sreq->transferred : 0;
|
||||
__entry->start = sreq ? sreq->start : 0;
|
||||
),
|
||||
|
||||
TP_printk("R=%08x[%u] %s f=%02x s=%llx %zx/%zx %s e=%d",
|
||||
__entry->rreq, __entry->index,
|
||||
__print_symbolic(__entry->source, netfs_sreq_sources),
|
||||
__entry->flags,
|
||||
__entry->start, __entry->transferred, __entry->len,
|
||||
__print_symbolic(__entry->what, netfs_failures),
|
||||
__entry->error)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_NETFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
318
lib/iov_iter.c
318
lib/iov_iter.c
@ -76,7 +76,44 @@
|
||||
} \
|
||||
}
|
||||
|
||||
#define iterate_all_kinds(i, n, v, I, B, K) { \
|
||||
#define iterate_xarray(i, n, __v, skip, STEP) { \
|
||||
struct page *head = NULL; \
|
||||
size_t wanted = n, seg, offset; \
|
||||
loff_t start = i->xarray_start + skip; \
|
||||
pgoff_t index = start >> PAGE_SHIFT; \
|
||||
int j; \
|
||||
\
|
||||
XA_STATE(xas, i->xarray, index); \
|
||||
\
|
||||
rcu_read_lock(); \
|
||||
xas_for_each(&xas, head, ULONG_MAX) { \
|
||||
if (xas_retry(&xas, head)) \
|
||||
continue; \
|
||||
if (WARN_ON(xa_is_value(head))) \
|
||||
break; \
|
||||
if (WARN_ON(PageHuge(head))) \
|
||||
break; \
|
||||
for (j = (head->index < index) ? index - head->index : 0; \
|
||||
j < thp_nr_pages(head); j++) { \
|
||||
__v.bv_page = head + j; \
|
||||
offset = (i->xarray_start + skip) & ~PAGE_MASK; \
|
||||
seg = PAGE_SIZE - offset; \
|
||||
__v.bv_offset = offset; \
|
||||
__v.bv_len = min(n, seg); \
|
||||
(void)(STEP); \
|
||||
n -= __v.bv_len; \
|
||||
skip += __v.bv_len; \
|
||||
if (n == 0) \
|
||||
break; \
|
||||
} \
|
||||
if (n == 0) \
|
||||
break; \
|
||||
} \
|
||||
rcu_read_unlock(); \
|
||||
n = wanted - n; \
|
||||
}
|
||||
|
||||
#define iterate_all_kinds(i, n, v, I, B, K, X) { \
|
||||
if (likely(n)) { \
|
||||
size_t skip = i->iov_offset; \
|
||||
if (unlikely(i->type & ITER_BVEC)) { \
|
||||
@ -88,6 +125,9 @@
|
||||
struct kvec v; \
|
||||
iterate_kvec(i, n, v, kvec, skip, (K)) \
|
||||
} else if (unlikely(i->type & ITER_DISCARD)) { \
|
||||
} else if (unlikely(i->type & ITER_XARRAY)) { \
|
||||
struct bio_vec v; \
|
||||
iterate_xarray(i, n, v, skip, (X)); \
|
||||
} else { \
|
||||
const struct iovec *iov; \
|
||||
struct iovec v; \
|
||||
@ -96,7 +136,7 @@
|
||||
} \
|
||||
}
|
||||
|
||||
#define iterate_and_advance(i, n, v, I, B, K) { \
|
||||
#define iterate_and_advance(i, n, v, I, B, K, X) { \
|
||||
if (unlikely(i->count < n)) \
|
||||
n = i->count; \
|
||||
if (i->count) { \
|
||||
@ -121,6 +161,9 @@
|
||||
i->kvec = kvec; \
|
||||
} else if (unlikely(i->type & ITER_DISCARD)) { \
|
||||
skip += n; \
|
||||
} else if (unlikely(i->type & ITER_XARRAY)) { \
|
||||
struct bio_vec v; \
|
||||
iterate_xarray(i, n, v, skip, (X)) \
|
||||
} else { \
|
||||
const struct iovec *iov; \
|
||||
struct iovec v; \
|
||||
@ -622,7 +665,9 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
||||
copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
|
||||
memcpy_to_page(v.bv_page, v.bv_offset,
|
||||
(from += v.bv_len) - v.bv_len, v.bv_len),
|
||||
memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
|
||||
memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
|
||||
memcpy_to_page(v.bv_page, v.bv_offset,
|
||||
(from += v.bv_len) - v.bv_len, v.bv_len)
|
||||
)
|
||||
|
||||
return bytes;
|
||||
@ -738,6 +783,18 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
|
||||
bytes = curr_addr - s_addr - rem;
|
||||
return bytes;
|
||||
}
|
||||
}),
|
||||
({
|
||||
rem = copy_mc_to_page(v.bv_page, v.bv_offset,
|
||||
(from += v.bv_len) - v.bv_len, v.bv_len);
|
||||
if (rem) {
|
||||
curr_addr = (unsigned long) from;
|
||||
bytes = curr_addr - s_addr - rem;
|
||||
rcu_read_unlock();
|
||||
i->iov_offset += bytes;
|
||||
i->count -= bytes;
|
||||
return bytes;
|
||||
}
|
||||
})
|
||||
)
|
||||
|
||||
@ -759,7 +816,9 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
|
||||
copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len),
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len)
|
||||
)
|
||||
|
||||
return bytes;
|
||||
@ -785,7 +844,9 @@ bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
|
||||
0;}),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len),
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len)
|
||||
)
|
||||
|
||||
iov_iter_advance(i, bytes);
|
||||
@ -805,7 +866,9 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
||||
v.iov_base, v.iov_len),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len),
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len)
|
||||
)
|
||||
|
||||
return bytes;
|
||||
@ -840,7 +903,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
|
||||
memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len),
|
||||
memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
|
||||
v.iov_len)
|
||||
v.iov_len),
|
||||
memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len)
|
||||
)
|
||||
|
||||
return bytes;
|
||||
@ -864,7 +929,9 @@ bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
|
||||
0;}),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len),
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
|
||||
memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len)
|
||||
)
|
||||
|
||||
iov_iter_advance(i, bytes);
|
||||
@ -901,7 +968,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
|
||||
{
|
||||
if (unlikely(!page_copy_sane(page, offset, bytes)))
|
||||
return 0;
|
||||
if (i->type & (ITER_BVEC|ITER_KVEC)) {
|
||||
if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
|
||||
void *kaddr = kmap_atomic(page);
|
||||
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
|
||||
kunmap_atomic(kaddr);
|
||||
@ -924,7 +991,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
|
||||
WARN_ON(1);
|
||||
return 0;
|
||||
}
|
||||
if (i->type & (ITER_BVEC|ITER_KVEC)) {
|
||||
if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
|
||||
void *kaddr = kmap_atomic(page);
|
||||
size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
|
||||
kunmap_atomic(kaddr);
|
||||
@ -968,7 +1035,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
|
||||
iterate_and_advance(i, bytes, v,
|
||||
clear_user(v.iov_base, v.iov_len),
|
||||
memzero_page(v.bv_page, v.bv_offset, v.bv_len),
|
||||
memset(v.iov_base, 0, v.iov_len)
|
||||
memset(v.iov_base, 0, v.iov_len),
|
||||
memzero_page(v.bv_page, v.bv_offset, v.bv_len)
|
||||
)
|
||||
|
||||
return bytes;
|
||||
@ -992,7 +1060,9 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
|
||||
copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len),
|
||||
memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
|
||||
memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
|
||||
memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
|
||||
v.bv_offset, v.bv_len)
|
||||
)
|
||||
kunmap_atomic(kaddr);
|
||||
return bytes;
|
||||
@ -1078,11 +1148,17 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
|
||||
i->count -= size;
|
||||
return;
|
||||
}
|
||||
if (unlikely(iov_iter_is_xarray(i))) {
|
||||
size = min(size, i->count);
|
||||
i->iov_offset += size;
|
||||
i->count -= size;
|
||||
return;
|
||||
}
|
||||
if (iov_iter_is_bvec(i)) {
|
||||
iov_iter_bvec_advance(i, size);
|
||||
return;
|
||||
}
|
||||
iterate_and_advance(i, size, v, 0, 0, 0)
|
||||
iterate_and_advance(i, size, v, 0, 0, 0, 0)
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_advance);
|
||||
|
||||
@ -1126,7 +1202,12 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
|
||||
return;
|
||||
}
|
||||
unroll -= i->iov_offset;
|
||||
if (iov_iter_is_bvec(i)) {
|
||||
if (iov_iter_is_xarray(i)) {
|
||||
BUG(); /* We should never go beyond the start of the specified
|
||||
* range since we might then be straying into pages that
|
||||
* aren't pinned.
|
||||
*/
|
||||
} else if (iov_iter_is_bvec(i)) {
|
||||
const struct bio_vec *bvec = i->bvec;
|
||||
while (1) {
|
||||
size_t n = (--bvec)->bv_len;
|
||||
@ -1163,9 +1244,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
|
||||
return i->count; // it is a silly place, anyway
|
||||
if (i->nr_segs == 1)
|
||||
return i->count;
|
||||
if (unlikely(iov_iter_is_discard(i)))
|
||||
if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
|
||||
return i->count;
|
||||
else if (iov_iter_is_bvec(i))
|
||||
if (iov_iter_is_bvec(i))
|
||||
return min(i->count, i->bvec->bv_len - i->iov_offset);
|
||||
else
|
||||
return min(i->count, i->iov->iov_len - i->iov_offset);
|
||||
@ -1213,6 +1294,31 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_pipe);
|
||||
|
||||
/**
|
||||
* iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
|
||||
* @i: The iterator to initialise.
|
||||
* @direction: The direction of the transfer.
|
||||
* @xarray: The xarray to access.
|
||||
* @start: The start file position.
|
||||
* @count: The size of the I/O buffer in bytes.
|
||||
*
|
||||
* Set up an I/O iterator to either draw data out of the pages attached to an
|
||||
* inode or to inject data into those pages. The pages *must* be prevented
|
||||
* from evaporation, either by taking a ref on them or locking them by the
|
||||
* caller.
|
||||
*/
|
||||
void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
|
||||
struct xarray *xarray, loff_t start, size_t count)
|
||||
{
|
||||
BUG_ON(direction & ~1);
|
||||
i->type = ITER_XARRAY | (direction & (READ | WRITE));
|
||||
i->xarray = xarray;
|
||||
i->xarray_start = start;
|
||||
i->count = count;
|
||||
i->iov_offset = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(iov_iter_xarray);
|
||||
|
||||
/**
|
||||
* iov_iter_discard - Initialise an I/O iterator that discards data
|
||||
* @i: The iterator to initialise.
|
||||
@ -1243,10 +1349,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
|
||||
return size | i->iov_offset;
|
||||
return size;
|
||||
}
|
||||
if (unlikely(iov_iter_is_xarray(i)))
|
||||
return (i->xarray_start + i->iov_offset) | i->count;
|
||||
iterate_all_kinds(i, size, v,
|
||||
(res |= (unsigned long)v.iov_base | v.iov_len, 0),
|
||||
res |= v.bv_offset | v.bv_len,
|
||||
res |= (unsigned long)v.iov_base | v.iov_len
|
||||
res |= (unsigned long)v.iov_base | v.iov_len,
|
||||
res |= v.bv_offset | v.bv_len
|
||||
)
|
||||
return res;
|
||||
}
|
||||
@ -1268,7 +1377,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
|
||||
(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
|
||||
(size != v.bv_len ? size : 0)),
|
||||
(res |= (!res ? 0 : (unsigned long)v.iov_base) |
|
||||
(size != v.iov_len ? size : 0))
|
||||
(size != v.iov_len ? size : 0)),
|
||||
(res |= (!res ? 0 : (unsigned long)v.bv_offset) |
|
||||
(size != v.bv_len ? size : 0))
|
||||
);
|
||||
return res;
|
||||
}
|
||||
@ -1318,6 +1429,75 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
|
||||
return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
|
||||
}
|
||||
|
||||
static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
|
||||
pgoff_t index, unsigned int nr_pages)
|
||||
{
|
||||
XA_STATE(xas, xa, index);
|
||||
struct page *page;
|
||||
unsigned int ret = 0;
|
||||
|
||||
rcu_read_lock();
|
||||
for (page = xas_load(&xas); page; page = xas_next(&xas)) {
|
||||
if (xas_retry(&xas, page))
|
||||
continue;
|
||||
|
||||
/* Has the page moved or been split? */
|
||||
if (unlikely(page != xas_reload(&xas))) {
|
||||
xas_reset(&xas);
|
||||
continue;
|
||||
}
|
||||
|
||||
pages[ret] = find_subpage(page, xas.xa_index);
|
||||
get_page(pages[ret]);
|
||||
if (++ret == nr_pages)
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t iter_xarray_get_pages(struct iov_iter *i,
|
||||
struct page **pages, size_t maxsize,
|
||||
unsigned maxpages, size_t *_start_offset)
|
||||
{
|
||||
unsigned nr, offset;
|
||||
pgoff_t index, count;
|
||||
size_t size = maxsize, actual;
|
||||
loff_t pos;
|
||||
|
||||
if (!size || !maxpages)
|
||||
return 0;
|
||||
|
||||
pos = i->xarray_start + i->iov_offset;
|
||||
index = pos >> PAGE_SHIFT;
|
||||
offset = pos & ~PAGE_MASK;
|
||||
*_start_offset = offset;
|
||||
|
||||
count = 1;
|
||||
if (size > PAGE_SIZE - offset) {
|
||||
size -= PAGE_SIZE - offset;
|
||||
count += size >> PAGE_SHIFT;
|
||||
size &= ~PAGE_MASK;
|
||||
if (size)
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count > maxpages)
|
||||
count = maxpages;
|
||||
|
||||
nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
|
||||
if (nr == 0)
|
||||
return 0;
|
||||
|
||||
actual = PAGE_SIZE * nr;
|
||||
actual -= offset;
|
||||
if (nr == count && size > 0) {
|
||||
unsigned last_offset = (nr > 1) ? 0 : offset;
|
||||
actual -= PAGE_SIZE - (last_offset + size);
|
||||
}
|
||||
return actual;
|
||||
}
|
||||
|
||||
ssize_t iov_iter_get_pages(struct iov_iter *i,
|
||||
struct page **pages, size_t maxsize, unsigned maxpages,
|
||||
size_t *start)
|
||||
@ -1327,6 +1507,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
|
||||
|
||||
if (unlikely(iov_iter_is_pipe(i)))
|
||||
return pipe_get_pages(i, pages, maxsize, maxpages, start);
|
||||
if (unlikely(iov_iter_is_xarray(i)))
|
||||
return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
|
||||
if (unlikely(iov_iter_is_discard(i)))
|
||||
return -EFAULT;
|
||||
|
||||
@ -1353,7 +1535,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
|
||||
return v.bv_len;
|
||||
}),({
|
||||
return -EFAULT;
|
||||
})
|
||||
}),
|
||||
0
|
||||
)
|
||||
return 0;
|
||||
}
|
||||
@ -1397,6 +1580,51 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
|
||||
return n;
|
||||
}
|
||||
|
||||
static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
|
||||
struct page ***pages, size_t maxsize,
|
||||
size_t *_start_offset)
|
||||
{
|
||||
struct page **p;
|
||||
unsigned nr, offset;
|
||||
pgoff_t index, count;
|
||||
size_t size = maxsize, actual;
|
||||
loff_t pos;
|
||||
|
||||
if (!size)
|
||||
return 0;
|
||||
|
||||
pos = i->xarray_start + i->iov_offset;
|
||||
index = pos >> PAGE_SHIFT;
|
||||
offset = pos & ~PAGE_MASK;
|
||||
*_start_offset = offset;
|
||||
|
||||
count = 1;
|
||||
if (size > PAGE_SIZE - offset) {
|
||||
size -= PAGE_SIZE - offset;
|
||||
count += size >> PAGE_SHIFT;
|
||||
size &= ~PAGE_MASK;
|
||||
if (size)
|
||||
count++;
|
||||
}
|
||||
|
||||
p = get_pages_array(count);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
*pages = p;
|
||||
|
||||
nr = iter_xarray_populate_pages(p, i->xarray, index, count);
|
||||
if (nr == 0)
|
||||
return 0;
|
||||
|
||||
actual = PAGE_SIZE * nr;
|
||||
actual -= offset;
|
||||
if (nr == count && size > 0) {
|
||||
unsigned last_offset = (nr > 1) ? 0 : offset;
|
||||
actual -= PAGE_SIZE - (last_offset + size);
|
||||
}
|
||||
return actual;
|
||||
}
|
||||
|
||||
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
|
||||
struct page ***pages, size_t maxsize,
|
||||
size_t *start)
|
||||
@ -1408,6 +1636,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
|
||||
|
||||
if (unlikely(iov_iter_is_pipe(i)))
|
||||
return pipe_get_pages_alloc(i, pages, maxsize, start);
|
||||
if (unlikely(iov_iter_is_xarray(i)))
|
||||
return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
|
||||
if (unlikely(iov_iter_is_discard(i)))
|
||||
return -EFAULT;
|
||||
|
||||
@ -1440,7 +1670,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
|
||||
return v.bv_len;
|
||||
}),({
|
||||
return -EFAULT;
|
||||
})
|
||||
}), 0
|
||||
)
|
||||
return 0;
|
||||
}
|
||||
@ -1478,6 +1708,13 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
|
||||
v.iov_base, v.iov_len,
|
||||
sum, off);
|
||||
off += v.iov_len;
|
||||
}), ({
|
||||
char *p = kmap_atomic(v.bv_page);
|
||||
sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
|
||||
p + v.bv_offset, v.bv_len,
|
||||
sum, off);
|
||||
kunmap_atomic(p);
|
||||
off += v.bv_len;
|
||||
})
|
||||
)
|
||||
*csum = sum;
|
||||
@ -1519,6 +1756,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
|
||||
v.iov_base, v.iov_len,
|
||||
sum, off);
|
||||
off += v.iov_len;
|
||||
}), ({
|
||||
char *p = kmap_atomic(v.bv_page);
|
||||
sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
|
||||
p + v.bv_offset, v.bv_len,
|
||||
sum, off);
|
||||
kunmap_atomic(p);
|
||||
off += v.bv_len;
|
||||
})
|
||||
)
|
||||
*csum = sum;
|
||||
@ -1565,6 +1809,13 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
|
||||
(from += v.iov_len) - v.iov_len,
|
||||
v.iov_len, sum, off);
|
||||
off += v.iov_len;
|
||||
}), ({
|
||||
char *p = kmap_atomic(v.bv_page);
|
||||
sum = csum_and_memcpy(p + v.bv_offset,
|
||||
(from += v.bv_len) - v.bv_len,
|
||||
v.bv_len, sum, off);
|
||||
kunmap_atomic(p);
|
||||
off += v.bv_len;
|
||||
})
|
||||
)
|
||||
csstate->csum = sum;
|
||||
@ -1615,6 +1866,21 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
|
||||
npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
|
||||
if (npages >= maxpages)
|
||||
return maxpages;
|
||||
} else if (unlikely(iov_iter_is_xarray(i))) {
|
||||
unsigned offset;
|
||||
|
||||
offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
|
||||
|
||||
npages = 1;
|
||||
if (size > PAGE_SIZE - offset) {
|
||||
size -= PAGE_SIZE - offset;
|
||||
npages += size >> PAGE_SHIFT;
|
||||
size &= ~PAGE_MASK;
|
||||
if (size)
|
||||
npages++;
|
||||
}
|
||||
if (npages >= maxpages)
|
||||
return maxpages;
|
||||
} else iterate_all_kinds(i, size, v, ({
|
||||
unsigned long p = (unsigned long)v.iov_base;
|
||||
npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
|
||||
@ -1631,7 +1897,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
|
||||
- p / PAGE_SIZE;
|
||||
if (npages >= maxpages)
|
||||
return maxpages;
|
||||
})
|
||||
}),
|
||||
0
|
||||
)
|
||||
return npages;
|
||||
}
|
||||
@ -1644,7 +1911,7 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
if (unlikely(iov_iter_is_discard(new)))
|
||||
if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
|
||||
return NULL;
|
||||
if (iov_iter_is_bvec(new))
|
||||
return new->bvec = kmemdup(new->bvec,
|
||||
@ -1849,7 +2116,12 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
|
||||
kunmap(v.bv_page);
|
||||
err;}), ({
|
||||
w = v;
|
||||
err = f(&w, context);})
|
||||
err = f(&w, context);}), ({
|
||||
w.iov_base = kmap(v.bv_page) + v.bv_offset;
|
||||
w.iov_len = v.bv_len;
|
||||
err = f(&w, context);
|
||||
kunmap(v.bv_page);
|
||||
err;})
|
||||
)
|
||||
return err;
|
||||
}
|
||||
|
65
mm/filemap.c
65
mm/filemap.c
@ -1432,6 +1432,67 @@ void unlock_page(struct page *page)
|
||||
}
|
||||
EXPORT_SYMBOL(unlock_page);
|
||||
|
||||
/**
|
||||
* end_page_private_2 - Clear PG_private_2 and release any waiters
|
||||
* @page: The page
|
||||
*
|
||||
* Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
|
||||
* this. The page ref held for PG_private_2 being set is released.
|
||||
*
|
||||
* This is, for example, used when a netfs page is being written to a local
|
||||
* disk cache, thereby allowing writes to the cache for the same page to be
|
||||
* serialised.
|
||||
*/
|
||||
void end_page_private_2(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
VM_BUG_ON_PAGE(!PagePrivate2(page), page);
|
||||
clear_bit_unlock(PG_private_2, &page->flags);
|
||||
wake_up_page_bit(page, PG_private_2);
|
||||
put_page(page);
|
||||
}
|
||||
EXPORT_SYMBOL(end_page_private_2);
|
||||
|
||||
/**
|
||||
* wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
|
||||
* @page: The page to wait on
|
||||
*
|
||||
* Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
|
||||
*/
|
||||
void wait_on_page_private_2(struct page *page)
|
||||
{
|
||||
page = compound_head(page);
|
||||
while (PagePrivate2(page))
|
||||
wait_on_page_bit(page, PG_private_2);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_on_page_private_2);
|
||||
|
||||
/**
|
||||
* wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
|
||||
* @page: The page to wait on
|
||||
*
|
||||
* Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
|
||||
* fatal signal is received by the calling task.
|
||||
*
|
||||
* Return:
|
||||
* - 0 if successful.
|
||||
* - -EINTR if a fatal signal was encountered.
|
||||
*/
|
||||
int wait_on_page_private_2_killable(struct page *page)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
page = compound_head(page);
|
||||
while (PagePrivate2(page)) {
|
||||
ret = wait_on_page_bit_killable(page, PG_private_2);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(wait_on_page_private_2_killable);
|
||||
|
||||
/**
|
||||
* end_page_writeback - end writeback against a page
|
||||
* @page: the page
|
||||
@ -2778,7 +2839,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
|
||||
struct file *file = vmf->vma->vm_file;
|
||||
struct file_ra_state *ra = &file->f_ra;
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff);
|
||||
DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
|
||||
struct file *fpin = NULL;
|
||||
unsigned int mmap_miss;
|
||||
|
||||
@ -2790,7 +2851,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
|
||||
|
||||
if (vmf->vma->vm_flags & VM_SEQ_READ) {
|
||||
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||
page_cache_sync_ra(&ractl, ra, ra->ra_pages);
|
||||
page_cache_sync_ra(&ractl, ra->ra_pages);
|
||||
return fpin;
|
||||
}
|
||||
|
||||
|
@ -51,13 +51,12 @@ void unmap_page_range(struct mmu_gather *tlb,
|
||||
|
||||
void do_page_cache_ra(struct readahead_control *, unsigned long nr_to_read,
|
||||
unsigned long lookahead_size);
|
||||
void force_page_cache_ra(struct readahead_control *, struct file_ra_state *,
|
||||
unsigned long nr);
|
||||
void force_page_cache_ra(struct readahead_control *, unsigned long nr);
|
||||
static inline void force_page_cache_readahead(struct address_space *mapping,
|
||||
struct file *file, pgoff_t index, unsigned long nr_to_read)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, file, mapping, index);
|
||||
force_page_cache_ra(&ractl, &file->f_ra, nr_to_read);
|
||||
DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index);
|
||||
force_page_cache_ra(&ractl, nr_to_read);
|
||||
}
|
||||
|
||||
unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
|
||||
|
101
mm/readahead.c
101
mm/readahead.c
@ -198,8 +198,6 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
|
||||
for (i = 0; i < nr_to_read; i++) {
|
||||
struct page *page = xa_load(&mapping->i_pages, index + i);
|
||||
|
||||
BUG_ON(index + i != ractl->_index + ractl->_nr_pages);
|
||||
|
||||
if (page && !xa_is_value(page)) {
|
||||
/*
|
||||
* Page already present? Kick off the current batch
|
||||
@ -210,6 +208,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
|
||||
* not worth getting one just for that.
|
||||
*/
|
||||
read_pages(ractl, &page_pool, true);
|
||||
i = ractl->_index + ractl->_nr_pages - index - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -223,6 +222,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl,
|
||||
gfp_mask) < 0) {
|
||||
put_page(page);
|
||||
read_pages(ractl, &page_pool, true);
|
||||
i = ractl->_index + ractl->_nr_pages - index - 1;
|
||||
continue;
|
||||
}
|
||||
if (i == nr_to_read - lookahead_size)
|
||||
@ -272,9 +272,10 @@ void do_page_cache_ra(struct readahead_control *ractl,
|
||||
* memory at once.
|
||||
*/
|
||||
void force_page_cache_ra(struct readahead_control *ractl,
|
||||
struct file_ra_state *ra, unsigned long nr_to_read)
|
||||
unsigned long nr_to_read)
|
||||
{
|
||||
struct address_space *mapping = ractl->mapping;
|
||||
struct file_ra_state *ra = ractl->ra;
|
||||
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
|
||||
unsigned long max_pages, index;
|
||||
|
||||
@ -433,10 +434,10 @@ static int try_context_readahead(struct address_space *mapping,
|
||||
* A minimal readahead algorithm for trivial sequential/random reads.
|
||||
*/
|
||||
static void ondemand_readahead(struct readahead_control *ractl,
|
||||
struct file_ra_state *ra, bool hit_readahead_marker,
|
||||
unsigned long req_size)
|
||||
bool hit_readahead_marker, unsigned long req_size)
|
||||
{
|
||||
struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host);
|
||||
struct file_ra_state *ra = ractl->ra;
|
||||
unsigned long max_pages = ra->ra_pages;
|
||||
unsigned long add_pages;
|
||||
unsigned long index = readahead_index(ractl);
|
||||
@ -550,7 +551,7 @@ readit:
|
||||
}
|
||||
|
||||
void page_cache_sync_ra(struct readahead_control *ractl,
|
||||
struct file_ra_state *ra, unsigned long req_count)
|
||||
unsigned long req_count)
|
||||
{
|
||||
bool do_forced_ra = ractl->file && (ractl->file->f_mode & FMODE_RANDOM);
|
||||
|
||||
@ -560,7 +561,7 @@ void page_cache_sync_ra(struct readahead_control *ractl,
|
||||
* read-ahead will do the right thing and limit the read to just the
|
||||
* requested range, which we'll set to 1 page for this case.
|
||||
*/
|
||||
if (!ra->ra_pages || blk_cgroup_congested()) {
|
||||
if (!ractl->ra->ra_pages || blk_cgroup_congested()) {
|
||||
if (!ractl->file)
|
||||
return;
|
||||
req_count = 1;
|
||||
@ -569,21 +570,20 @@ void page_cache_sync_ra(struct readahead_control *ractl,
|
||||
|
||||
/* be dumb */
|
||||
if (do_forced_ra) {
|
||||
force_page_cache_ra(ractl, ra, req_count);
|
||||
force_page_cache_ra(ractl, req_count);
|
||||
return;
|
||||
}
|
||||
|
||||
/* do read-ahead */
|
||||
ondemand_readahead(ractl, ra, false, req_count);
|
||||
ondemand_readahead(ractl, false, req_count);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(page_cache_sync_ra);
|
||||
|
||||
void page_cache_async_ra(struct readahead_control *ractl,
|
||||
struct file_ra_state *ra, struct page *page,
|
||||
unsigned long req_count)
|
||||
struct page *page, unsigned long req_count)
|
||||
{
|
||||
/* no read-ahead */
|
||||
if (!ra->ra_pages)
|
||||
if (!ractl->ra->ra_pages)
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -604,7 +604,7 @@ void page_cache_async_ra(struct readahead_control *ractl,
|
||||
return;
|
||||
|
||||
/* do read-ahead */
|
||||
ondemand_readahead(ractl, ra, true, req_count);
|
||||
ondemand_readahead(ractl, true, req_count);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(page_cache_async_ra);
|
||||
|
||||
@ -638,3 +638,78 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
|
||||
{
|
||||
return ksys_readahead(fd, offset, count);
|
||||
}
|
||||
|
||||
/**
|
||||
* readahead_expand - Expand a readahead request
|
||||
* @ractl: The request to be expanded
|
||||
* @new_start: The revised start
|
||||
* @new_len: The revised size of the request
|
||||
*
|
||||
* Attempt to expand a readahead request outwards from the current size to the
|
||||
* specified size by inserting locked pages before and after the current window
|
||||
* to increase the size to the new window. This may involve the insertion of
|
||||
* THPs, in which case the window may get expanded even beyond what was
|
||||
* requested.
|
||||
*
|
||||
* The algorithm will stop if it encounters a conflicting page already in the
|
||||
* pagecache and leave a smaller expansion than requested.
|
||||
*
|
||||
* The caller must check for this by examining the revised @ractl object for a
|
||||
* different expansion than was requested.
|
||||
*/
|
||||
void readahead_expand(struct readahead_control *ractl,
|
||||
loff_t new_start, size_t new_len)
|
||||
{
|
||||
struct address_space *mapping = ractl->mapping;
|
||||
struct file_ra_state *ra = ractl->ra;
|
||||
pgoff_t new_index, new_nr_pages;
|
||||
gfp_t gfp_mask = readahead_gfp_mask(mapping);
|
||||
|
||||
new_index = new_start / PAGE_SIZE;
|
||||
|
||||
/* Expand the leading edge downwards */
|
||||
while (ractl->_index > new_index) {
|
||||
unsigned long index = ractl->_index - 1;
|
||||
struct page *page = xa_load(&mapping->i_pages, index);
|
||||
|
||||
if (page && !xa_is_value(page))
|
||||
return; /* Page apparently present */
|
||||
|
||||
page = __page_cache_alloc(gfp_mask);
|
||||
if (!page)
|
||||
return;
|
||||
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
|
||||
put_page(page);
|
||||
return;
|
||||
}
|
||||
|
||||
ractl->_nr_pages++;
|
||||
ractl->_index = page->index;
|
||||
}
|
||||
|
||||
new_len += new_start - readahead_pos(ractl);
|
||||
new_nr_pages = DIV_ROUND_UP(new_len, PAGE_SIZE);
|
||||
|
||||
/* Expand the trailing edge upwards */
|
||||
while (ractl->_nr_pages < new_nr_pages) {
|
||||
unsigned long index = ractl->_index + ractl->_nr_pages;
|
||||
struct page *page = xa_load(&mapping->i_pages, index);
|
||||
|
||||
if (page && !xa_is_value(page))
|
||||
return; /* Page apparently present */
|
||||
|
||||
page = __page_cache_alloc(gfp_mask);
|
||||
if (!page)
|
||||
return;
|
||||
if (add_to_page_cache_lru(page, mapping, index, gfp_mask) < 0) {
|
||||
put_page(page);
|
||||
return;
|
||||
}
|
||||
ractl->_nr_pages++;
|
||||
if (ra) {
|
||||
ra->size++;
|
||||
ra->async_size++;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(readahead_expand);
|
||||
|
Loading…
Reference in New Issue
Block a user