forked from Minki/linux
02c24a8218
Btrfs needs to be able to control how filemap_write_and_wait_range() is called in fsync to make it less of a painful operation, so push down taking i_mutex and the calling of filemap_write_and_wait() down into the ->fsync() handlers. Some file systems can drop taking the i_mutex altogether it seems, like ext3 and ocfs2. For correctness sake I just pushed everything down in all cases to make sure that we keep the current behavior the same for everybody, and then each individual fs maintainer can make up their mind about what to do from there. Thanks, Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
110 lines
3.3 KiB
C
110 lines
3.3 KiB
C
/*
|
|
* linux/fs/ext3/fsync.c
|
|
*
|
|
* Copyright (C) 1993 Stephen Tweedie (sct@redhat.com)
|
|
* from
|
|
* Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
|
|
* Laboratoire MASI - Institut Blaise Pascal
|
|
* Universite Pierre et Marie Curie (Paris VI)
|
|
* from
|
|
* linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
|
|
*
|
|
* ext3fs fsync primitive
|
|
*
|
|
* Big-endian to little-endian byte-swapping/bitmaps by
|
|
* David S. Miller (davem@caip.rutgers.edu), 1995
|
|
*
|
|
* Removed unnecessary code duplication for little endian machines
|
|
* and excessive __inline__s.
|
|
* Andi Kleen, 1997
|
|
*
|
|
* Major simplications and cleanup - we only need to do the metadata, because
|
|
* we can depend on generic_block_fdatasync() to sync the data blocks.
|
|
*/
|
|
|
|
#include <linux/time.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/writeback.h>
|
|
#include <linux/jbd.h>
|
|
#include <linux/ext3_fs.h>
|
|
#include <linux/ext3_jbd.h>
|
|
|
|
/*
|
|
* akpm: A new design for ext3_sync_file().
|
|
*
|
|
* This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
|
|
* There cannot be a transaction open by this task.
|
|
* Another task could have dirtied this inode. Its data can be in any
|
|
* state in the journalling system.
|
|
*
|
|
* What we do is just kick off a commit and wait on it. This will snapshot the
|
|
* inode to disk.
|
|
*/
|
|
|
|
int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
|
{
|
|
struct inode *inode = file->f_mapping->host;
|
|
struct ext3_inode_info *ei = EXT3_I(inode);
|
|
journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
|
|
int ret, needs_barrier = 0;
|
|
tid_t commit_tid;
|
|
|
|
if (inode->i_sb->s_flags & MS_RDONLY)
|
|
return 0;
|
|
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/*
|
|
* Taking the mutex here just to keep consistent with how fsync was
|
|
* called previously, however it looks like we don't need to take
|
|
* i_mutex at all.
|
|
*/
|
|
mutex_lock(&inode->i_mutex);
|
|
|
|
J_ASSERT(ext3_journal_current_handle() == NULL);
|
|
|
|
/*
|
|
* data=writeback,ordered:
|
|
* The caller's filemap_fdatawrite()/wait will sync the data.
|
|
* Metadata is in the journal, we wait for a proper transaction
|
|
* to commit here.
|
|
*
|
|
* data=journal:
|
|
* filemap_fdatawrite won't do anything (the buffers are clean).
|
|
* ext3_force_commit will write the file data into the journal and
|
|
* will wait on that.
|
|
* filemap_fdatawait() will encounter a ton of newly-dirtied pages
|
|
* (they were dirtied by commit). But that's OK - the blocks are
|
|
* safe in-journal, which is all fsync() needs to ensure.
|
|
*/
|
|
if (ext3_should_journal_data(inode)) {
|
|
mutex_unlock(&inode->i_mutex);
|
|
return ext3_force_commit(inode->i_sb);
|
|
}
|
|
|
|
if (datasync)
|
|
commit_tid = atomic_read(&ei->i_datasync_tid);
|
|
else
|
|
commit_tid = atomic_read(&ei->i_sync_tid);
|
|
|
|
if (test_opt(inode->i_sb, BARRIER) &&
|
|
!journal_trans_will_send_data_barrier(journal, commit_tid))
|
|
needs_barrier = 1;
|
|
log_start_commit(journal, commit_tid);
|
|
ret = log_wait_commit(journal, commit_tid);
|
|
|
|
/*
|
|
* In case we didn't commit a transaction, we have to flush
|
|
* disk caches manually so that data really is on persistent
|
|
* storage
|
|
*/
|
|
if (needs_barrier)
|
|
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
|
mutex_unlock(&inode->i_mutex);
|
|
return ret;
|
|
}
|