Merge branch 'master' into for-linus

This commit is contained in:
Felix Blyakher 2009-04-09 14:12:07 -05:00
commit dc2a5536d6
13 changed files with 180 additions and 161 deletions

View File

@ -152,23 +152,6 @@ xfs_find_bdev_for_inode(
return mp->m_ddev_targp->bt_bdev;
}
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
queue_work(xfsdatad_workqueue, &ioend->io_work);
if (wait)
flush_workqueue(xfsdatad_workqueue);
}
}
/*
* We're now finished for good with this ioend structure.
* Update the page state via the associated buffer_heads,
@ -309,6 +292,27 @@ xfs_end_bio_read(
xfs_destroy_ioend(ioend);
}
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct workqueue_struct *wq = xfsdatad_workqueue;
if (ioend->io_work.func == xfs_end_bio_unwritten)
wq = xfsconvertd_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
flush_workqueue(wq);
}
}
/*
* Allocate and initialise an IO completion structure.
* We need to track unwritten extent write completion here initially.

View File

@ -19,6 +19,7 @@
#define __XFS_AOPS_H__
extern struct workqueue_struct *xfsdatad_workqueue;
extern struct workqueue_struct *xfsconvertd_workqueue;
extern mempool_t *xfs_ioend_pool;
/*

View File

@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
struct workqueue_struct *xfsconvertd_workqueue;
#ifdef XFS_BUF_TRACE
void
@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
xfs_buf_t *bp, *n;
int pincount = 0;
xfs_buf_runall_queues(xfsconvertd_workqueue);
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
@ -1831,9 +1833,15 @@ xfs_buf_init(void)
if (!xfsdatad_workqueue)
goto out_destroy_xfslogd_workqueue;
xfsconvertd_workqueue = create_workqueue("xfsconvertd");
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
register_shrinker(&xfs_buf_shake);
return 0;
out_destroy_xfsdatad_workqueue:
destroy_workqueue(xfsdatad_workqueue);
out_destroy_xfslogd_workqueue:
destroy_workqueue(xfslogd_workqueue);
out_free_buf_zone:
@ -1849,6 +1857,7 @@ void
xfs_buf_terminate(void)
{
unregister_shrinker(&xfs_buf_shake);
destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
kmem_zone_destroy(xfs_buf_zone);

View File

@ -74,14 +74,14 @@ xfs_flush_pages(
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
xfs_iflags_clear(ip, XFS_ITRUNCATED);
ret = filemap_fdatawrite(mapping);
ret = -filemap_fdatawrite(mapping);
}
if (flags & XFS_B_ASYNC)
return -ret;
ret2 = filemap_fdatawait(mapping);
return ret;
ret2 = xfs_wait_on_pages(ip, first, last);
if (!ret)
ret = ret2;
}
return -ret;
return ret;
}
int

View File

@ -751,10 +751,26 @@ start:
goto relock;
}
} else {
int enospc = 0;
ssize_t ret2 = 0;
write_retry:
xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
*offset, ioflags);
ret = generic_file_buffered_write(iocb, iovp, segs,
ret2 = generic_file_buffered_write(iocb, iovp, segs,
pos, offset, count, ret);
/*
* if we just got an ENOSPC, flush the inode now we
* aren't holding any page locks and retry *once*
*/
if (ret2 == -ENOSPC && !enospc) {
error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
if (error)
goto out_unlock_internal;
enospc = 1;
goto write_retry;
}
ret = ret2;
}
current->backing_dev_info = NULL;

View File

@ -62,12 +62,6 @@ xfs_sync_inodes_ag(
uint32_t first_index = 0;
int error = 0;
int last_error = 0;
int fflag = XFS_B_ASYNC;
if (flags & SYNC_DELWRI)
fflag = XFS_B_DELWRI;
if (flags & SYNC_WAIT)
fflag = 0; /* synchronous overrides all */
do {
struct inode *inode;
@ -128,11 +122,23 @@ xfs_sync_inodes_ag(
* If we have to flush data or wait for I/O completion
* we need to hold the iolock.
*/
if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
if (flags & SYNC_DELWRI) {
if (VN_DIRTY(inode)) {
if (flags & SYNC_TRYLOCK) {
if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
lock_flags |= XFS_IOLOCK_SHARED;
} else {
xfs_ilock(ip, XFS_IOLOCK_SHARED);
lock_flags |= XFS_IOLOCK_SHARED;
error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
if (flags & SYNC_IOWAIT)
}
if (lock_flags & XFS_IOLOCK_SHARED) {
error = xfs_flush_pages(ip, 0, -1,
(flags & SYNC_WAIT) ? 0
: XFS_B_ASYNC,
FI_NONE);
}
}
if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
xfs_ioend_wait(ip);
}
xfs_ilock(ip, XFS_ILOCK_SHARED);
@ -398,15 +404,17 @@ STATIC void
xfs_syncd_queue_work(
struct xfs_mount *mp,
void *data,
void (*syncer)(struct xfs_mount *, void *))
void (*syncer)(struct xfs_mount *, void *),
struct completion *completion)
{
struct bhv_vfs_sync_work *work;
struct xfs_sync_work *work;
work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
INIT_LIST_HEAD(&work->w_list);
work->w_syncer = syncer;
work->w_data = data;
work->w_mount = mp;
work->w_completion = completion;
spin_lock(&mp->m_sync_lock);
list_add_tail(&work->w_list, &mp->m_sync_list);
spin_unlock(&mp->m_sync_lock);
@ -420,49 +428,26 @@ xfs_syncd_queue_work(
* heads, looking about for more room...
*/
STATIC void
xfs_flush_inode_work(
xfs_flush_inodes_work(
struct xfs_mount *mp,
void *arg)
{
struct inode *inode = arg;
filemap_flush(inode->i_mapping);
xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
iput(inode);
}
void
xfs_flush_inode(
xfs_flush_inodes(
xfs_inode_t *ip)
{
struct inode *inode = VFS_I(ip);
DECLARE_COMPLETION_ONSTACK(completion);
igrab(inode);
xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
delay(msecs_to_jiffies(500));
}
/*
* This is the "bigger hammer" version of xfs_flush_inode_work...
* (IOW, "If at first you don't succeed, use a Bigger Hammer").
*/
STATIC void
xfs_flush_device_work(
struct xfs_mount *mp,
void *arg)
{
struct inode *inode = arg;
sync_blockdev(mp->m_super->s_bdev);
iput(inode);
}
void
xfs_flush_device(
xfs_inode_t *ip)
{
struct inode *inode = VFS_I(ip);
igrab(inode);
xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
delay(msecs_to_jiffies(500));
xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
wait_for_completion(&completion);
xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
}
@ -497,7 +482,7 @@ xfssyncd(
{
struct xfs_mount *mp = arg;
long timeleft;
bhv_vfs_sync_work_t *work, *n;
xfs_sync_work_t *work, *n;
LIST_HEAD (tmp);
set_freezable();
@ -532,6 +517,8 @@ xfssyncd(
list_del(&work->w_list);
if (work == &mp->m_sync_work)
continue;
if (work->w_completion)
complete(work->w_completion);
kmem_free(work);
}
}
@ -545,6 +532,7 @@ xfs_syncd_init(
{
mp->m_sync_work.w_syncer = xfs_sync_worker;
mp->m_sync_work.w_mount = mp;
mp->m_sync_work.w_completion = NULL;
mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
if (IS_ERR(mp->m_sync_task))
return -PTR_ERR(mp->m_sync_task);

View File

@ -21,18 +21,20 @@
struct xfs_mount;
struct xfs_perag;
typedef struct bhv_vfs_sync_work {
typedef struct xfs_sync_work {
struct list_head w_list;
struct xfs_mount *w_mount;
void *w_data; /* syncer routine argument */
void (*w_syncer)(struct xfs_mount *, void *);
} bhv_vfs_sync_work_t;
struct completion *w_completion;
} xfs_sync_work_t;
#define SYNC_ATTR 0x0001 /* sync attributes */
#define SYNC_DELWRI 0x0002 /* look at delayed writes */
#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
int xfs_quiesce_data(struct xfs_mount *mp);
void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inode(struct xfs_inode *ip);
void xfs_flush_device(struct xfs_inode *ip);
void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);

View File

@ -69,15 +69,6 @@ xfs_inode_alloc(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
/*
* initialise the VFS inode here to get failures
* out of the way early.
*/
if (!inode_init_always(mp->m_super, VFS_I(ip))) {
kmem_zone_free(xfs_inode_zone, ip);
return NULL;
}
/* initialise the xfs inode */
ip->i_ino = ino;
ip->i_mount = mp;
@ -113,6 +104,20 @@ xfs_inode_alloc(
#ifdef XFS_DIR2_TRACE
ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
#endif
/*
* Now initialise the VFS inode. We do this after the xfs_inode
* initialisation as internal failures will result in ->destroy_inode
* being called and that will pass down through the reclaim path and
* free the XFS inode. This path requires the XFS inode to already be
* initialised. Hence if this call fails, the xfs_inode has already
* been freed and we should not reference it at all in the error
* handling.
*/
if (!inode_init_always(mp->m_super, VFS_I(ip)))
return NULL;
/* prevent anyone from using this yet */
VFS_I(ip)->i_state = I_NEW|I_LOCK;
return ip;
}

View File

@ -337,38 +337,6 @@ xfs_iomap_eof_align_last_fsb(
return 0;
}
STATIC int
xfs_flush_space(
xfs_inode_t *ip,
int *fsynced,
int *ioflags)
{
switch (*fsynced) {
case 0:
if (ip->i_delayed_blks) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_flush_inode(ip);
xfs_ilock(ip, XFS_ILOCK_EXCL);
*fsynced = 1;
} else {
*ioflags |= BMAPI_SYNC;
*fsynced = 2;
}
return 0;
case 1:
*fsynced = 2;
*ioflags |= BMAPI_SYNC;
return 0;
case 2:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_flush_device(ip);
xfs_ilock(ip, XFS_ILOCK_EXCL);
*fsynced = 3;
return 0;
}
return 1;
}
STATIC int
xfs_cmn_err_fsblock_zero(
xfs_inode_t *ip,
@ -538,15 +506,9 @@ error_out:
}
/*
* If the caller is doing a write at the end of the file,
* then extend the allocation out to the file system's write
* iosize. We clean up any extra space left over when the
* file is closed in xfs_inactive().
*
* For sync writes, we are flushing delayed allocate space to
* try to make additional space available for allocation near
* the filesystem full boundary - preallocation hurts in that
* situation, of course.
* If the caller is doing a write at the end of the file, then extend the
* allocation out to the file system's write iosize. We clean up any extra
* space left over when the file is closed in xfs_inactive().
*/
STATIC int
xfs_iomap_eof_want_preallocate(
@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate(
int n, error, imaps;
*prealloc = 0;
if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size)
if ((offset + count) <= ip->i_size)
return 0;
/*
@ -611,7 +573,7 @@ xfs_iomap_write_delay(
xfs_extlen_t extsz;
int nimaps;
xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
int prealloc, fsynced = 0;
int prealloc, flushed = 0;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@ -627,12 +589,12 @@ xfs_iomap_write_delay(
extsz = xfs_get_extsz_hint(ip);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
retry:
error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
if (error)
return error;
retry:
if (prealloc) {
aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@ -659,15 +621,22 @@ retry:
/*
* If bmapi returned us nothing, and if we didn't get back EDQUOT,
* then we must have run out of space - flush delalloc, and retry..
* then we must have run out of space - flush all other inodes with
* delalloc blocks and retry without EOF preallocation.
*/
if (nimaps == 0) {
xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
ip, offset, count);
if (xfs_flush_space(ip, &fsynced, &ioflag))
if (flushed)
return XFS_ERROR(ENOSPC);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_flush_inodes(ip);
xfs_ilock(ip, XFS_ILOCK_EXCL);
flushed = 1;
error = 0;
prealloc = 0;
goto retry;
}

View File

@ -40,8 +40,7 @@ typedef enum {
BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */
BMAPI_MMAP = (1 << 6), /* allocate for mmap write */
BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */
BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */
BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
} bmapi_flags_t;

View File

@ -562,9 +562,8 @@ xfs_log_mount(
}
mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
if (!mp->m_log) {
cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!");
error = ENOMEM;
if (IS_ERR(mp->m_log)) {
error = -PTR_ERR(mp->m_log);
goto out;
}
@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp,
xfs_buf_t *bp;
int i;
int iclogsize;
int error = ENOMEM;
log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
if (!log)
return NULL;
if (!log) {
xlog_warn("XFS: Log allocation failed: No memory!");
goto out;
}
log->l_mp = mp;
log->l_targ = log_target;
@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_grant_reserve_cycle = 1;
log->l_grant_write_cycle = 1;
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
if (log->l_sectbb_log < 0 ||
log->l_sectbb_log > mp->m_sectbb_log) {
xlog_warn("XFS: Log sector size (0x%x) out of range.",
log->l_sectbb_log);
goto out_free_log;
}
/* for larger sector sizes, must have v2 or external log */
ASSERT(log->l_sectbb_log == 0 ||
log->l_logBBstart == 0 ||
xfs_sb_version_haslogv2(&mp->m_sb));
ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
if (log->l_sectbb_log != 0 &&
(log->l_logBBstart != 0 &&
!xfs_sb_version_haslogv2(&mp->m_sb))) {
xlog_warn("XFS: log sector size (0x%x) invalid "
"for configuration.", log->l_sectbb_log);
goto out_free_log;
}
if (mp->m_sb.sb_logsectlog < BBSHIFT) {
xlog_warn("XFS: Log sector log (0x%x) too small.",
mp->m_sb.sb_logsectlog);
goto out_free_log;
}
}
log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
xlog_get_iclog_buffer_size(mp, log);
error = ENOMEM;
bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
if (!bp)
goto out_free_log;
@ -1313,7 +1331,8 @@ out_free_iclog:
xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
return NULL;
out:
return ERR_PTR(-error);
} /* xlog_alloc_log */
@ -2541,18 +2560,19 @@ redo:
xlog_ins_ticketq(&log->l_reserve_headq, tic);
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
spin_unlock(&log->l_grant_lock);
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
}
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: wake 2");
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
goto redo;
} else if (tic->t_flags & XLOG_TIC_IN_Q)
xlog_del_ticketq(&log->l_reserve_headq, tic);
@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log,
* for more free space, otherwise try to get some space for
* this transaction.
*/
need_bytes = tic->t_unit_res;
if ((ntic = log->l_write_headq)) {
free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
log->l_grant_write_bytes);
@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
spin_unlock(&log->l_grant_lock);
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_wait, PINOD|PLTWAIT,
&log->l_grant_lock, s);
/* If we're shutting down, this tic is already
* off the queue */
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
}
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: wake 1");
xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
spin_lock(&log->l_grant_lock);
}
}
need_bytes = tic->t_unit_res;
redo:
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
@ -2680,19 +2699,20 @@ redo:
if (free_bytes < need_bytes) {
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
xlog_ins_ticketq(&log->l_write_headq, tic);
spin_unlock(&log->l_grant_lock);
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
XFS_STATS_INC(xs_sleep_logspace);
sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/* If we're shutting down, this tic is already off the queue */
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto error_return;
}
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: wake 2");
xlog_grant_push_ail(log->l_mp, need_bytes);
spin_lock(&log->l_grant_lock);
goto redo;
} else if (tic->t_flags & XLOG_TIC_IN_Q)
xlog_del_ticketq(&log->l_write_headq, tic);

View File

@ -313,7 +313,7 @@ typedef struct xfs_mount {
#endif
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct task_struct *m_sync_task; /* generalised sync thread */
bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
struct list_head m_sync_list; /* sync thread work item list */
spinlock_t m_sync_lock; /* work item list lock */
int m_sync_seq; /* sync thread generation no. */

View File

@ -1457,6 +1457,13 @@ xfs_create(
error = xfs_trans_reserve(tp, resblks, log_res, 0,
XFS_TRANS_PERM_LOG_RES, log_count);
if (error == ENOSPC) {
/* flush outstanding delalloc blocks and retry */
xfs_flush_inodes(dp);
error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
}
if (error == ENOSPC) {
/* No space at all so try a "no-allocation" reservation */
resblks = 0;
error = xfs_trans_reserve(tp, 0, log_res, 0,
XFS_TRANS_PERM_LOG_RES, log_count);