linux/fs/nfs/file.c
Linus Torvalds 965181d7ef NFS client updates for Linux 5.18
Highlights include:
 
 Features:
 - Switch NFS to use readahead instead of the obsolete readpages.
 - Readdir fixes to improve cacheability of large directories when there
   are multiple readers and writers.
 - Readdir performance improvements when doing a seekdir() immediately
   after opening the directory (common when re-exporting NFS).
 - NFS swap improvements from Neil Brown.
 - Loosen up memory allocation to permit direct reclaim and write back
   in cases where there is no danger of deadlocking the writeback code or
   NFS swap.
 - Avoid sillyrename when the NFSv4 server claims to support the
   necessary features to recover the unlinked but open file after reboot.
 
 Bugfixes:
 - Patch from Olga to add a mount option to control NFSv4.1 session
   trunking discovery, and default it to being off.
 - Fix a lockup in nfs_do_recoalesce().
 - Two fixes for list iterator variables being used when pointing to the
   list head.
 - Fix a kernel memory scribble when reading from a non-socket transport
   in /sys/kernel/sunrpc.
 - Fix a race where reconnecting to a server could leave the TCP socket
   stuck forever in the connecting state.
 - Patch from Neil to fix a shutdown race which can leave the SUNRPC
   transport timer primed after we free the struct xprt itself.
 - Patch from Xin Xiong to fix reference count leaks in the NFSv4.2 copy
   offload.
 - Sunrpc patch from Olga to avoid resending a task on an offlined
   transport.
 
 Cleanups:
 - Patches from Dave Wysochanski to clean up the fscache code
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEESQctxSBg8JpV8KqEZwvnipYKAPIFAmJDLLUACgkQZwvnipYK
 APJZ5Q/9FH5S38RcUOxpFS+XaVdmxltJTMQv9Bz5mcyljq6PwRjTn73Qs7MfBQfD
 k90O9Pn/EXgQpFWNW4o/saQGLAO1MbVaesdNNF92Le9NziMz7lr95fsFXOqpg9EQ
 rEVOImWinv/N0+P0yv5tgjx6Fheu70L4dnwsxepnmtv2R1VjXOVchK7WjsV+9qey
 Xvn15OzhtVCGhRvvKfmGuGW+2I0D29tRNn2X0Pi+LjxAEQ0We/rcsJeGDMGYtbeC
 2U4ADk7KB0kJq04WYCH8k8tTrljL3PUONzO6QP1lltmAZ0FOwXf/AjyaD3SML2Xu
 A1yc2bEn3/S6wCQ3BiMxoE7UCSewg/ZyQD7B9GjZ/nWYNXREhLKvtVZt+ULTCA6o
 UKAI2vlEHljzQI2YfbVVZFyL8KCZqi+BFcJ/H/dtFKNazIkiefkaF+703Pflazis
 HjhA5G6IT0Pk6qZ6OBp+Z/4kCDYZYXXi9ysiNuMPWCiYEfZzt7ocUjm44uRAi+vR
 Qpf0V01Rg4iplTj3YFXqb9TwxrudzC7AcQ7hbLJia0kD/8djG+aGZs3SJWeO+VuE
 Xa41oxAx9jqOSRvSKPvj7nbexrwn8PtQI2NtDRqjPXlg6+Jbk23u8lIEVhsCH877
 xoMUHtTKLyqECskzRN4nh7qoBD3QfiLuI74aDcPtGEuXzU1+Ox8=
 =jeGt
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-5.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Features:

   - Switch NFS to use readahead instead of the obsolete readpages.

   - Readdir fixes to improve cacheability of large directories when
     there are multiple readers and writers.

   - Readdir performance improvements when doing a seekdir() immediately
     after opening the directory (common when re-exporting NFS).

   - NFS swap improvements from Neil Brown.

   - Loosen up memory allocation to permit direct reclaim and write back
     in cases where there is no danger of deadlocking the writeback code
     or NFS swap.

   - Avoid sillyrename when the NFSv4 server claims to support the
     necessary features to recover the unlinked but open file after
     reboot.

  Bugfixes:

   - Patch from Olga to add a mount option to control NFSv4.1 session
     trunking discovery, and default it to being off.

   - Fix a lockup in nfs_do_recoalesce().

   - Two fixes for list iterator variables being used when pointing to
     the list head.

   - Fix a kernel memory scribble when reading from a non-socket
     transport in /sys/kernel/sunrpc.

   - Fix a race where reconnecting to a server could leave the TCP
     socket stuck forever in the connecting state.

   - Patch from Neil to fix a shutdown race which can leave the SUNRPC
     transport timer primed after we free the struct xprt itself.

   - Patch from Xin Xiong to fix reference count leaks in the NFSv4.2
     copy offload.

   - Sunrpc patch from Olga to avoid resending a task on an offlined
     transport.

  Cleanups:

   - Patches from Dave Wysochanski to clean up the fscache code"

* tag 'nfs-for-5.18-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (91 commits)
  NFSv4/pNFS: Fix another issue with a list iterator pointing to the head
  NFS: Don't loop forever in nfs_do_recoalesce()
  SUNRPC: Don't return error values in sysfs read of closed files
  SUNRPC: Do not dereference non-socket transports in sysfs
  NFSv4.1: don't retry BIND_CONN_TO_SESSION on session error
  SUNRPC don't resend a task on an offlined transport
  NFS: replace usage of found with dedicated list iterator variable
  SUNRPC: avoid race between mod_timer() and del_timer_sync()
  pNFS/files: Ensure pNFS allocation modes are consistent with nfsiod
  pNFS/flexfiles: Ensure pNFS allocation modes are consistent with nfsiod
  NFSv4/pnfs: Ensure pNFS allocation modes are consistent with nfsiod
  NFS: Avoid writeback threads getting stuck in mempool_alloc()
  NFS: nfsiod should not block forever in mempool_alloc()
  SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent
  SUNRPC: Fix unx_lookup_cred() allocation
  NFS: Fix memory allocation in rpc_alloc_task()
  NFS: Fix memory allocation in rpc_malloc()
  SUNRPC: Improve accuracy of socket ENOBUFS determination
  SUNRPC: Replace internal use of SOCKWQ_ASYNC_NOSPACE
  SUNRPC: Fix socket waits for write buffer space
  ...
2022-03-29 18:55:37 -07:00

880 lines
22 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/nfs/file.c
*
* Copyright (C) 1992 Rick Sladkey
*
* Changes Copyright (C) 1994 by Florian La Roche
* - Do not copy data too often around in the kernel.
* - In nfs_file_read the return value of kmalloc wasn't checked.
* - Put in a better version of read look-ahead buffering. Original idea
* and implementation by Wai S Kok elekokws@ee.nus.sg.
*
* Expire cache on write to a file by Wai S Kok (Oct 1994).
*
* Total rewrite of read side for new NFS buffer cache.. Linus.
*
* nfs regular file handling functions
*/
#include <linux/module.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/gfp.h>
#include <linux/swap.h>
#include <linux/uaccess.h>
#include "delegation.h"
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
#include "pnfs.h"
#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_FILE
static const struct vm_operations_struct nfs_file_vm_ops;
int nfs_check_flags(int flags)
{
if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(nfs_check_flags);
/*
* Open file
*/
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
int res;
dprintk("NFS: open file(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSOPEN);
res = nfs_check_flags(filp->f_flags);
if (res)
return res;
res = nfs_open(inode, filp);
return res;
}
int
nfs_file_release(struct inode *inode, struct file *filp)
{
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
nfs_file_clear_open_context(filp);
nfs_fscache_release_file(inode, filp);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_file_release);
/**
* nfs_revalidate_file_size - Revalidate the file size
* @inode: pointer to inode struct
* @filp: pointer to struct file
*
* Revalidates the file length. This is basically a wrapper around
* nfs_revalidate_inode() that takes into account the fact that we may
* have cached writes (in which case we don't care about the server's
* idea of what the file length is), or O_DIRECT (in which case we
* shouldn't trust the cache).
*/
static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
{
struct nfs_server *server = NFS_SERVER(inode);
if (filp->f_flags & O_DIRECT)
goto force_reval;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
goto force_reval;
return 0;
force_reval:
return __nfs_revalidate_inode(server, inode);
}
loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence)
{
dprintk("NFS: llseek file(%pD2, %lld, %d)\n",
filp, offset, whence);
/*
* whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
* the cached file length
*/
if (whence != SEEK_SET && whence != SEEK_CUR) {
struct inode *inode = filp->f_mapping->host;
int retval = nfs_revalidate_file_size(inode, filp);
if (retval < 0)
return (loff_t)retval;
}
return generic_file_llseek(filp, offset, whence);
}
EXPORT_SYMBOL_GPL(nfs_file_llseek);
/*
* Flush all dirty pages, and check for write errors.
*/
static int
nfs_file_flush(struct file *file, fl_owner_t id)
{
struct inode *inode = file_inode(file);
errseq_t since;
dprintk("NFS: flush(%pD2)\n", file);
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
/* Flush writes to the server and return any errors */
since = filemap_sample_wb_err(file->f_mapping);
nfs_wb_all(inode);
return filemap_check_wb_err(file->f_mapping, since);
}
ssize_t
nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t result;
if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_read(iocb, to, false);
dprintk("NFS: read(%pD2, %zu@%lu)\n",
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
nfs_start_io_read(inode);
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
if (result > 0)
nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
}
nfs_end_io_read(inode);
return result;
}
EXPORT_SYMBOL_GPL(nfs_file_read);
int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
struct inode *inode = file_inode(file);
int status;
dprintk("NFS: mmap(%pD2)\n", file);
/* Note: generic_file_mmap() returns ENOSYS on nommu systems
* so we call that before revalidating the mapping
*/
status = generic_file_mmap(file, vma);
if (!status) {
vma->vm_ops = &nfs_file_vm_ops;
status = nfs_revalidate_mapping(inode, file->f_mapping);
}
return status;
}
EXPORT_SYMBOL_GPL(nfs_file_mmap);
/*
* Flush any dirty pages for this process, and check for write errors.
* The return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
static int
nfs_file_fsync_commit(struct file *file, int datasync)
{
struct inode *inode = file_inode(file);
int ret;
dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
ret = nfs_commit_inode(inode, FLUSH_SYNC);
if (ret < 0)
return ret;
return file_check_and_advance_wb_err(file);
}
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
int ret;
trace_nfs_fsync_enter(inode);
for (;;) {
ret = file_write_and_wait_range(file, start, end);
if (ret != 0)
break;
ret = nfs_file_fsync_commit(file, datasync);
if (ret != 0)
break;
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
break;
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
* the NFS_CONTEXT_RESEND_WRITES flag
*/
start = 0;
end = LLONG_MAX;
}
trace_nfs_fsync_exit(inode, ret);
return ret;
}
EXPORT_SYMBOL_GPL(nfs_file_fsync);
/*
* Decide whether a read/modify/write cycle may be more efficient
* then a modify/write/read cycle when writing to a page in the
* page cache.
*
* Some pNFS layout drivers can only read/write at a certain block
* granularity like all block devices and therefore we must perform
* read/modify/write whenever a page hasn't read yet and the data
* to be written there is not aligned to a block boundary and/or
* smaller than the block size.
*
* The modify/write/read cycle may occur if a page is read before
* being completely filled by the writer. In this situation, the
* page must be completely written to stable storage on the server
* before it can be refilled by reading in the page from the server.
* This can lead to expensive, small, FILE_SYNC mode writes being
* done.
*
* It may be more efficient to read the page first if the file is
* open for reading in addition to writing, the page is not marked
* as Uptodate, it is not dirty or waiting to be committed,
* indicating that it was previously allocated and then modified,
* that there were valid bytes of data in that range of the file,
* and that the new data won't completely replace the old data in
* that range of the file.
*/
static bool nfs_full_page_write(struct page *page, loff_t pos, unsigned int len)
{
unsigned int pglen = nfs_page_length(page);
unsigned int offset = pos & (PAGE_SIZE - 1);
unsigned int end = offset + len;
return !pglen || (end >= pglen && !offset);
}
static bool nfs_want_read_modify_write(struct file *file, struct page *page,
loff_t pos, unsigned int len)
{
/*
* Up-to-date pages, those with ongoing or full-page write
* don't need read/modify/write
*/
if (PageUptodate(page) || PagePrivate(page) ||
nfs_full_page_write(page, pos, len))
return false;
if (pnfs_ld_read_whole_page(file->f_mapping->host))
return true;
/* Open for reading too? */
if (file->f_mode & FMODE_READ)
return true;
return false;
}
/*
* This does the "real" work of the write. We must allocate and lock the
* page to be sent back to the generic routine, which then copies the
* data from user space.
*
* If the writer ends up delaying the write, the writer needs to
* increment the page use counts until he is done with the page.
*/
static int nfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
int ret;
pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int once_thru = 0;
dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
start:
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
ret = nfs_flush_incompatible(file, page);
if (ret) {
unlock_page(page);
put_page(page);
} else if (!once_thru &&
nfs_want_read_modify_write(file, page, pos, len)) {
once_thru = 1;
ret = nfs_readpage(file, page);
put_page(page);
if (!ret)
goto start;
}
return ret;
}
static int nfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
unsigned offset = pos & (PAGE_SIZE - 1);
struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n",
file, mapping->host->i_ino, len, (long long) pos);
/*
* Zero any uninitialised parts of the page, and then mark the page
* as up to date if it turns out that we're extending the file.
*/
if (!PageUptodate(page)) {
unsigned pglen = nfs_page_length(page);
unsigned end = offset + copied;
if (pglen == 0) {
zero_user_segments(page, 0, offset,
end, PAGE_SIZE);
SetPageUptodate(page);
} else if (end >= pglen) {
zero_user_segment(page, end, PAGE_SIZE);
if (offset == 0)
SetPageUptodate(page);
} else
zero_user_segment(page, pglen, PAGE_SIZE);
}
status = nfs_updatepage(file, page, offset, copied);
unlock_page(page);
put_page(page);
if (status < 0)
return status;
NFS_I(mapping->host)->write_io += copied;
if (nfs_ctx_key_to_expire(ctx, mapping->host)) {
status = nfs_wb_all(mapping->host);
if (status < 0)
return status;
}
return copied;
}
/*
* Partially or wholly invalidate a page
* - Release the private state associated with a page if undergoing complete
* page invalidation
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
*/
static void nfs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n",
folio->index, offset, length);
if (offset != 0 || length < folio_size(folio))
return;
/* Cancel any unstarted writes on this page */
nfs_wb_folio_cancel(folio->mapping->host, folio);
folio_wait_fscache(folio);
}
/*
* Attempt to release the private state associated with a page
* - Called if either PG_private or PG_fscache is set on the page
* - Caller holds page lock
* - Return true (may release page) or false (may not)
*/
static int nfs_release_page(struct page *page, gfp_t gfp)
{
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
return nfs_fscache_release_page(page, gfp);
}
static void nfs_check_dirty_writeback(struct page *page,
bool *dirty, bool *writeback)
{
struct nfs_inode *nfsi;
struct address_space *mapping = page_file_mapping(page);
if (!mapping || PageSwapCache(page))
return;
/*
* Check if an unstable page is currently being committed and
* if so, have the VM treat it as if the page is under writeback
* so it will not block due to pages that will shortly be freeable.
*/
nfsi = NFS_I(mapping->host);
if (atomic_read(&nfsi->commit_info.rpcs_out)) {
*writeback = true;
return;
}
/*
* If PagePrivate() is set, then the page is not freeable and as the
* inode is not being committed, it's not going to be cleaned in the
* near future so treat it as dirty
*/
if (PagePrivate(page))
*dirty = true;
}
/*
* Attempt to clear the private state associated with a page when an error
* occurs that requires the cached contents of an inode to be written back or
* destroyed
* - Called if either PG_private or fscache is set on the page
* - Caller holds page lock
* - Return 0 if successful, -error otherwise
*/
static int nfs_launder_folio(struct folio *folio)
{
struct inode *inode = folio->mapping->host;
dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
inode->i_ino, folio_pos(folio));
folio_wait_fscache(folio);
return nfs_wb_page(inode, &folio->page);
}
static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
unsigned long blocks;
long long isize;
struct inode *inode = file_inode(file);
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
spin_lock(&inode->i_lock);
blocks = inode->i_blocks;
isize = inode->i_size;
spin_unlock(&inode->i_lock);
if (blocks*512 < isize) {
pr_warn("swap activate: swapfile has holes\n");
return -EINVAL;
}
*span = sis->pages;
if (cl->rpc_ops->enable_swap)
cl->rpc_ops->enable_swap(inode);
return rpc_clnt_swap_activate(clnt);
}
static void nfs_swap_deactivate(struct file *file)
{
struct inode *inode = file_inode(file);
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_client *cl = NFS_SERVER(inode)->nfs_client;
rpc_clnt_swap_deactivate(clnt);
if (cl->rpc_ops->disable_swap)
cl->rpc_ops->disable_swap(file_inode(file));
}
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readahead = nfs_readahead,
.dirty_folio = filemap_dirty_folio,
.writepage = nfs_writepage,
.writepages = nfs_writepages,
.write_begin = nfs_write_begin,
.write_end = nfs_write_end,
.invalidate_folio = nfs_invalidate_folio,
.releasepage = nfs_release_page,
.direct_IO = nfs_direct_IO,
#ifdef CONFIG_MIGRATION
.migratepage = nfs_migrate_page,
#endif
.launder_folio = nfs_launder_folio,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
.swap_activate = nfs_swap_activate,
.swap_deactivate = nfs_swap_deactivate,
};
/*
* Notification that a PTE pointing to an NFS page is about to be made
* writable, implying that someone is about to modify the page through a
* shared-writable mapping
*/
static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
{
struct page *page = vmf->page;
struct file *filp = vmf->vma->vm_file;
struct inode *inode = file_inode(filp);
unsigned pagelen;
vm_fault_t ret = VM_FAULT_NOPAGE;
struct address_space *mapping;
dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n",
filp, filp->f_mapping->host->i_ino,
(long long)page_offset(page));
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
if (PageFsCache(page) &&
wait_on_page_fscache_killable(vmf->page) < 0) {
ret = VM_FAULT_RETRY;
goto out;
}
wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
nfs_wait_bit_killable, TASK_KILLABLE);
lock_page(page);
mapping = page_file_mapping(page);
if (mapping != inode->i_mapping)
goto out_unlock;
wait_on_page_writeback(page);
pagelen = nfs_page_length(page);
if (pagelen == 0)
goto out_unlock;
ret = VM_FAULT_LOCKED;
if (nfs_flush_incompatible(filp, page) == 0 &&
nfs_updatepage(filp, page, 0, pagelen) == 0)
goto out;
ret = VM_FAULT_SIGBUS;
out_unlock:
unlock_page(page);
out:
sb_end_pagefault(inode->i_sb);
return ret;
}
static const struct vm_operations_struct nfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = nfs_vm_page_mkwrite,
};
static int nfs_need_check_write(struct file *filp, struct inode *inode,
int error)
{
struct nfs_open_context *ctx;
ctx = nfs_file_open_context(filp);
if (nfs_error_is_fatal_on_server(error) ||
nfs_ctx_key_to_expire(ctx, inode))
return 1;
return 0;
}
ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
unsigned int mntflags = NFS_SERVER(inode)->flags;
ssize_t result, written;
errseq_t since;
int error;
result = nfs_key_timeout_notify(file, inode);
if (result)
return result;
if (iocb->ki_flags & IOCB_DIRECT)
return nfs_file_direct_write(iocb, from, false);
dprintk("NFS: write(%pD2, %zu@%Ld)\n",
file, iov_iter_count(from), (long long) iocb->ki_pos);
if (IS_SWAPFILE(inode))
goto out_swapfile;
/*
* O_APPEND implies that we must revalidate the file length.
*/
if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
result = nfs_revalidate_file_size(inode, file);
if (result)
goto out;
}
nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
nfs_start_io_write(inode);
result = generic_write_checks(iocb, from);
if (result > 0) {
current->backing_dev_info = inode_to_bdi(inode);
result = generic_perform_write(file, from, iocb->ki_pos);
current->backing_dev_info = NULL;
}
nfs_end_io_write(inode);
if (result <= 0)
goto out;
written = result;
iocb->ki_pos += written;
if (mntflags & NFS_MOUNT_WRITE_EAGER) {
result = filemap_fdatawrite_range(file->f_mapping,
iocb->ki_pos - written,
iocb->ki_pos - 1);
if (result < 0)
goto out;
}
if (mntflags & NFS_MOUNT_WRITE_WAIT) {
result = filemap_fdatawait_range(file->f_mapping,
iocb->ki_pos - written,
iocb->ki_pos - 1);
if (result < 0)
goto out;
}
result = generic_write_sync(iocb, written);
if (result < 0)
goto out;
/* Return error values */
error = filemap_check_wb_err(file->f_mapping, since);
if (nfs_need_check_write(file, inode, error)) {
int err = nfs_wb_all(inode);
if (err < 0)
result = err;
}
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
out:
return result;
out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
return -ETXTBSY;
}
EXPORT_SYMBOL_GPL(nfs_file_write);
static int
do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status = 0;
unsigned int saved_type = fl->fl_type;
/* Try local locking first */
posix_test_lock(filp, fl);
if (fl->fl_type != F_UNLCK) {
/* found a conflict */
goto out;
}
fl->fl_type = saved_type;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
goto out_noconflict;
if (is_local)
goto out_noconflict;
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
out:
return status;
out_noconflict:
fl->fl_type = F_UNLCK;
goto out;
}
static int
do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
struct nfs_lock_context *l_ctx;
int status;
/*
* Flush all pending writes before doing anything
* with locks..
*/
nfs_wb_all(inode);
l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
if (!IS_ERR(l_ctx)) {
status = nfs_iocounter_wait(l_ctx);
nfs_put_lock_context(l_ctx);
/* NOTE: special case
* If we're signalled while cleaning up locks on process exit, we
* still need to complete the unlock.
*/
if (status < 0 && !(fl->fl_flags & FL_CLOSE))
return status;
}
/*
* Use local locking if mounted with "-onolock" or with appropriate
* "-olocal_lock="
*/
if (!is_local)
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = locks_lock_file_wait(filp, fl);
return status;
}
static int
do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
{
struct inode *inode = filp->f_mapping->host;
int status;
/*
* Flush all pending writes before doing anything
* with locks..
*/
status = nfs_sync_mapping(filp->f_mapping);
if (status != 0)
goto out;
/*
* Use local locking if mounted with "-onolock" or with appropriate
* "-olocal_lock="
*/
if (!is_local)
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = locks_lock_file_wait(filp, fl);
if (status < 0)
goto out;
/*
* Invalidate cache to prevent missing any changes. If
* the file is mapped, clear the page cache as well so
* those mappings will be loaded.
*
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
nfs_zap_caches(inode);
if (mapping_mapped(filp->f_mapping))
nfs_revalidate_mapping(inode, filp->f_mapping);
}
out:
return status;
}
/*
* Lock a (portion of) a file
*/
int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int ret = -ENOLCK;
int is_local = 0;
dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n",
filp, fl->fl_type, fl->fl_flags,
(long long)fl->fl_start, (long long)fl->fl_end);
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
if (fl->fl_flags & FL_RECLAIM)
return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
is_local = 1;
if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
ret = NFS_PROTO(inode)->lock_check_bounds(fl);
if (ret < 0)
goto out_err;
}
if (IS_GETLK(cmd))
ret = do_getlk(filp, cmd, fl, is_local);
else if (fl->fl_type == F_UNLCK)
ret = do_unlk(filp, cmd, fl, is_local);
else
ret = do_setlk(filp, cmd, fl, is_local);
out_err:
return ret;
}
EXPORT_SYMBOL_GPL(nfs_lock);
/*
* Lock a (portion of) a file
*/
int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int is_local = 0;
dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n",
filp, fl->fl_type, fl->fl_flags);
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
/* We're simulating flock() locks using posix locks on the server */
if (fl->fl_type == F_UNLCK)
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
}
EXPORT_SYMBOL_GPL(nfs_flock);
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
.read_iter = nfs_file_read,
.write_iter = nfs_file_write,
.mmap = nfs_file_mmap,
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
.fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
};
EXPORT_SYMBOL_GPL(nfs_file_operations);