linux/fs/xfs/linux-2.6/xfs_file.c
Christoph Hellwig e89bc612d6 [XFS] revert to double-buffering readdir
The current readdir implementation deadlocks on a btree buffers locks
because nfsd calls back into ->lookup from the filldir callback. The only
short-term fix for this is to revert to the old inefficient
double-buffering scheme.

SGI-PV: 973377
SGI-Modid: xfs-linux-melb:xfs-kern:30201a

Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
2007-12-10 13:47:15 +11:00

560 lines
13 KiB
C

/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_trans.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_ioctl32.h"
#include "xfs_vnodeops.h"
#include <linux/dcache.h>
#include <linux/smp_lock.h>
static struct vm_operations_struct xfs_file_vm_ops;
#ifdef CONFIG_XFS_DMAPI
static struct vm_operations_struct xfs_dmapi_file_vm_ops;
#endif
STATIC_INLINE ssize_t
__xfs_file_read(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
int ioflags,
loff_t pos)
{
struct file *file = iocb->ki_filp;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov,
nr_segs, &iocb->ki_pos, ioflags);
}
STATIC ssize_t
xfs_file_aio_read(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
{
return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos);
}
STATIC ssize_t
xfs_file_aio_read_invis(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
{
return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}
STATIC_INLINE ssize_t
__xfs_file_write(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
int ioflags,
loff_t pos)
{
struct file *file = iocb->ki_filp;
BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs,
&iocb->ki_pos, ioflags);
}
STATIC ssize_t
xfs_file_aio_write(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
{
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos);
}
STATIC ssize_t
xfs_file_aio_write_invis(
struct kiocb *iocb,
const struct iovec *iov,
unsigned long nr_segs,
loff_t pos)
{
return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}
STATIC ssize_t
xfs_file_splice_read(
struct file *infilp,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len,
unsigned int flags)
{
return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
infilp, ppos, pipe, len, flags, 0);
}
STATIC ssize_t
xfs_file_splice_read_invis(
struct file *infilp,
loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len,
unsigned int flags)
{
return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode),
infilp, ppos, pipe, len, flags, IO_INVIS);
}
STATIC ssize_t
xfs_file_splice_write(
struct pipe_inode_info *pipe,
struct file *outfilp,
loff_t *ppos,
size_t len,
unsigned int flags)
{
return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
pipe, outfilp, ppos, len, flags, 0);
}
STATIC ssize_t
xfs_file_splice_write_invis(
struct pipe_inode_info *pipe,
struct file *outfilp,
loff_t *ppos,
size_t len,
unsigned int flags)
{
return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode),
pipe, outfilp, ppos, len, flags, IO_INVIS);
}
STATIC int
xfs_file_open(
struct inode *inode,
struct file *filp)
{
if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
return -EFBIG;
return -xfs_open(XFS_I(inode));
}
STATIC int
xfs_file_release(
struct inode *inode,
struct file *filp)
{
return -xfs_release(XFS_I(inode));
}
STATIC int
xfs_file_fsync(
struct file *filp,
struct dentry *dentry,
int datasync)
{
int flags = FSYNC_WAIT;
if (datasync)
flags |= FSYNC_DATA;
xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED);
return -xfs_fsync(XFS_I(dentry->d_inode), flags,
(xfs_off_t)0, (xfs_off_t)-1);
}
#ifdef CONFIG_XFS_DMAPI
STATIC int
xfs_vm_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
bhv_vnode_t *vp = vn_from_inode(inode);
ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
return VM_FAULT_SIGBUS;
return filemap_fault(vma, vmf);
}
#endif /* CONFIG_XFS_DMAPI */
/*
* Unfortunately we can't just use the clean and simple readdir implementation
* below, because nfs might call back into ->lookup from the filldir callback
* and that will deadlock the low-level btree code.
*
* Hopefully we'll find a better workaround that allows to use the optimal
* version at least for local readdirs for 2.6.25.
*/
#if 0
STATIC int
xfs_file_readdir(
struct file *filp,
void *dirent,
filldir_t filldir)
{
struct inode *inode = filp->f_path.dentry->d_inode;
xfs_inode_t *ip = XFS_I(inode);
int error;
size_t bufsize;
/*
* The Linux API doesn't pass down the total size of the buffer
* we read into down to the filesystem. With the filldir concept
* it's not needed for correct information, but the XFS dir2 leaf
* code wants an estimate of the buffer size to calculate it's
* readahead window and size the buffers used for mapping to
* physical blocks.
*
* Try to give it an estimate that's good enough, maybe at some
* point we can change the ->readdir prototype to include the
* buffer size.
*/
bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size);
error = xfs_readdir(ip, dirent, bufsize,
(xfs_off_t *)&filp->f_pos, filldir);
if (error)
return -error;
return 0;
}
#else
struct hack_dirent {
int namlen;
loff_t offset;
u64 ino;
unsigned int d_type;
char name[];
};
struct hack_callback {
char *dirent;
size_t len;
size_t used;
};
STATIC int
xfs_hack_filldir(
void *__buf,
const char *name,
int namlen,
loff_t offset,
u64 ino,
unsigned int d_type)
{
struct hack_callback *buf = __buf;
struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
if (buf->used + sizeof(struct hack_dirent) + namlen > buf->len)
return -EINVAL;
de->namlen = namlen;
de->offset = offset;
de->ino = ino;
de->d_type = d_type;
memcpy(de->name, name, namlen);
buf->used += sizeof(struct hack_dirent) + namlen;
return 0;
}
STATIC int
xfs_file_readdir(
struct file *filp,
void *dirent,
filldir_t filldir)
{
struct inode *inode = filp->f_path.dentry->d_inode;
xfs_inode_t *ip = XFS_I(inode);
struct hack_callback buf;
struct hack_dirent *de;
int error;
loff_t size;
int eof = 0;
xfs_off_t start_offset, curr_offset, offset;
/*
* Try fairly hard to get memory
*/
buf.len = PAGE_CACHE_SIZE;
do {
buf.dirent = kmalloc(buf.len, GFP_KERNEL);
if (buf.dirent)
break;
buf.len >>= 1;
} while (buf.len >= 1024);
if (!buf.dirent)
return -ENOMEM;
curr_offset = filp->f_pos;
if (curr_offset == 0x7fffffff)
offset = 0xffffffff;
else
offset = filp->f_pos;
while (!eof) {
int reclen;
start_offset = offset;
buf.used = 0;
error = -xfs_readdir(ip, &buf, buf.len, &offset,
xfs_hack_filldir);
if (error || offset == start_offset) {
size = 0;
break;
}
size = buf.used;
de = (struct hack_dirent *)buf.dirent;
while (size > 0) {
if (filldir(dirent, de->name, de->namlen,
curr_offset & 0x7fffffff,
de->ino, de->d_type)) {
goto done;
}
reclen = sizeof(struct hack_dirent) + de->namlen;
size -= reclen;
curr_offset = de->offset /* & 0x7fffffff */;
de = (struct hack_dirent *)((char *)de + reclen);
}
}
done:
if (!error) {
if (size == 0)
filp->f_pos = offset & 0x7fffffff;
else if (de)
filp->f_pos = curr_offset;
}
kfree(buf.dirent);
return error;
}
#endif
STATIC int
xfs_file_mmap(
struct file *filp,
struct vm_area_struct *vma)
{
vma->vm_ops = &xfs_file_vm_ops;
vma->vm_flags |= VM_CAN_NONLINEAR;
#ifdef CONFIG_XFS_DMAPI
if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
vma->vm_ops = &xfs_dmapi_file_vm_ops;
#endif /* CONFIG_XFS_DMAPI */
file_accessed(filp);
return 0;
}
STATIC long
xfs_file_ioctl(
struct file *filp,
unsigned int cmd,
unsigned long p)
{
int error;
struct inode *inode = filp->f_path.dentry->d_inode;
error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p);
xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
/* NOTE: some of the ioctl's return positive #'s as a
* byte count indicating success, such as
* readlink_by_handle. So we don't "sign flip"
* like most other routines. This means true
* errors need to be returned as a negative value.
*/
return error;
}
STATIC long
xfs_file_ioctl_invis(
struct file *filp,
unsigned int cmd,
unsigned long p)
{
int error;
struct inode *inode = filp->f_path.dentry->d_inode;
error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p);
xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED);
/* NOTE: some of the ioctl's return positive #'s as a
* byte count indicating success, such as
* readlink_by_handle. So we don't "sign flip"
* like most other routines. This means true
* errors need to be returned as a negative value.
*/
return error;
}
#ifdef CONFIG_XFS_DMAPI
#ifdef HAVE_VMOP_MPROTECT
STATIC int
xfs_vm_mprotect(
struct vm_area_struct *vma,
unsigned int newflags)
{
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct xfs_mount *mp = XFS_M(inode->i_sb);
int error = 0;
if (mp->m_flags & XFS_MOUNT_DMAPI) {
if ((vma->vm_flags & VM_MAYSHARE) &&
(newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
}
return error;
}
#endif /* HAVE_VMOP_MPROTECT */
#endif /* CONFIG_XFS_DMAPI */
#ifdef HAVE_FOP_OPEN_EXEC
/* If the user is attempting to execute a file that is offline then
* we have to trigger a DMAPI READ event before the file is marked as busy
* otherwise the invisible I/O will not be able to write to the file to bring
* it back online.
*/
STATIC int
xfs_file_open_exec(
struct inode *inode)
{
struct xfs_mount *mp = XFS_M(inode->i_sb);
if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI)) {
if (DM_EVENT_ENABLED(XFS_I(inode), DM_EVENT_READ)) {
bhv_vnode_t *vp = vn_from_inode(inode);
return -XFS_SEND_DATA(mp, DM_EVENT_READ,
vp, 0, 0, 0, NULL);
}
}
return 0;
}
#endif /* HAVE_FOP_OPEN_EXEC */
/*
* mmap()d file has taken write protection fault and is being made
* writable. We can set the page state up correctly for a writable
* page, which means we can do correct delalloc accounting (ENOSPC
* checking!) and unwritten extent mapping.
*/
STATIC int
xfs_vm_page_mkwrite(
struct vm_area_struct *vma,
struct page *page)
{
return block_page_mkwrite(vma, page, xfs_get_blocks);
}
const struct file_operations xfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = xfs_file_aio_read,
.aio_write = xfs_file_aio_write,
.splice_read = xfs_file_splice_read,
.splice_write = xfs_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
#endif
.mmap = xfs_file_mmap,
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
#ifdef HAVE_FOP_OPEN_EXEC
.open_exec = xfs_file_open_exec,
#endif
};
const struct file_operations xfs_invis_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = xfs_file_aio_read_invis,
.aio_write = xfs_file_aio_write_invis,
.splice_read = xfs_file_splice_read_invis,
.splice_write = xfs_file_splice_write_invis,
.unlocked_ioctl = xfs_file_ioctl_invis,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_invis_ioctl,
#endif
.mmap = xfs_file_mmap,
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
};
const struct file_operations xfs_dir_file_operations = {
.read = generic_read_dir,
.readdir = xfs_file_readdir,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
#endif
.fsync = xfs_file_fsync,
};
static struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
.page_mkwrite = xfs_vm_page_mkwrite,
};
#ifdef CONFIG_XFS_DMAPI
static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
.fault = xfs_vm_fault,
.page_mkwrite = xfs_vm_page_mkwrite,
#ifdef HAVE_VMOP_MPROTECT
.mprotect = xfs_vm_mprotect,
#endif
};
#endif /* CONFIG_XFS_DMAPI */