xfs: skip flushing log items during push

The AIL pushing code spends a huge amount of time skipping over
items that are already marked as flushing. It is not uncommon to
see hundreds of thousands of items skipped every second due to inode
clustering marking all the inodes in a cluster as flushing when the
first one is flushed.

However, to discover an item is already flushing and should be
skipped we have to call the iop_push() method for it to try to flush
the item. For inodes (where this matters most), we have to first
check that inode is flushable first.

We can optimise this overhead away by tracking whether the log item
is flushing internally. This allows xfsaild_push() to check the log
item directly for flushing state and immediately skip the log item.
Whilst this doesn't remove the CPU cache misses for loading the log
item, it does avoid the overhead of an indirect function call
and the cache misses involved in accessing inode and
backing cluster buffer structures to determine flushing state. When
trying to flush hundreds of thousands of inodes each second, this
CPU overhead saving adds up quickly.

It's so noticeable that the biggest issue with pushing on the AIL on
fast storage becomes the 10ms back-off wait when we hit enough
pinned buffers to break out of the push loop but not enough for the
AIL pushing to be considered stuck. This limits the xfsaild to about
70% total CPU usage, and on fast storage this isn't enough to keep
the storage 100% busy.

The xfsaild will block on IO submission on slow storage and so is
self throttling - it does not need a backoff in the case where we
are really just breaking out of the walk to submit the IO we have
gathered.

Further with no backoff we don't need to gather huge delwri lists to
mitigate the impact of backoffs, so we can submit IO more frequently
and reduce the time log items spend in flushing state by breaking
out of the item push loop once we've gathered enough IO to batch
submission effectively.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
This commit is contained in:
Dave Chinner 2024-06-20 09:21:28 +02:00 committed by Chandan Babu R
parent c1220522ef
commit f3f7ae68a4
4 changed files with 16 additions and 3 deletions

View File

@ -2555,6 +2555,7 @@ flush_out:
iip->ili_last_fields = iip->ili_fields; iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0; iip->ili_fields = 0;
iip->ili_fsync_fields = 0; iip->ili_fsync_fields = 0;
set_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags);
spin_unlock(&iip->ili_lock); spin_unlock(&iip->ili_lock);
/* /*

View File

@ -965,6 +965,7 @@ xfs_iflush_finish(
} }
iip->ili_last_fields = 0; iip->ili_last_fields = 0;
iip->ili_flush_lsn = 0; iip->ili_flush_lsn = 0;
clear_bit(XFS_LI_FLUSHING, &lip->li_flags);
spin_unlock(&iip->ili_lock); spin_unlock(&iip->ili_lock);
xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING); xfs_iflags_clear(iip->ili_inode, XFS_IFLUSHING);
if (drop_buffer) if (drop_buffer)
@ -1023,8 +1024,10 @@ xfs_buf_inode_io_fail(
{ {
struct xfs_log_item *lip; struct xfs_log_item *lip;
list_for_each_entry(lip, &bp->b_li_list, li_bio_list) list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
set_bit(XFS_LI_FAILED, &lip->li_flags); set_bit(XFS_LI_FAILED, &lip->li_flags);
clear_bit(XFS_LI_FLUSHING, &lip->li_flags);
}
} }
/* /*
@ -1043,6 +1046,7 @@ xfs_iflush_abort_clean(
iip->ili_flush_lsn = 0; iip->ili_flush_lsn = 0;
iip->ili_item.li_buf = NULL; iip->ili_item.li_buf = NULL;
list_del_init(&iip->ili_item.li_bio_list); list_del_init(&iip->ili_item.li_bio_list);
clear_bit(XFS_LI_FLUSHING, &iip->ili_item.li_flags);
} }
/* /*

View File

@ -58,13 +58,15 @@ struct xfs_log_item {
#define XFS_LI_FAILED 2 #define XFS_LI_FAILED 2
#define XFS_LI_DIRTY 3 #define XFS_LI_DIRTY 3
#define XFS_LI_WHITEOUT 4 #define XFS_LI_WHITEOUT 4
#define XFS_LI_FLUSHING 5
#define XFS_LI_FLAGS \ #define XFS_LI_FLAGS \
{ (1u << XFS_LI_IN_AIL), "IN_AIL" }, \ { (1u << XFS_LI_IN_AIL), "IN_AIL" }, \
{ (1u << XFS_LI_ABORTED), "ABORTED" }, \ { (1u << XFS_LI_ABORTED), "ABORTED" }, \
{ (1u << XFS_LI_FAILED), "FAILED" }, \ { (1u << XFS_LI_FAILED), "FAILED" }, \
{ (1u << XFS_LI_DIRTY), "DIRTY" }, \ { (1u << XFS_LI_DIRTY), "DIRTY" }, \
{ (1u << XFS_LI_WHITEOUT), "WHITEOUT" } { (1u << XFS_LI_WHITEOUT), "WHITEOUT" }, \
{ (1u << XFS_LI_FLUSHING), "FLUSHING" }
struct xfs_item_ops { struct xfs_item_ops {
unsigned flags; unsigned flags;

View File

@ -512,6 +512,9 @@ xfsaild_push(
while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) { while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) {
int lock_result; int lock_result;
if (test_bit(XFS_LI_FLUSHING, &lip->li_flags))
goto next_item;
/* /*
* Note that iop_push may unlock and reacquire the AIL lock. We * Note that iop_push may unlock and reacquire the AIL lock. We
* rely on the AIL cursor implementation to be able to deal with * rely on the AIL cursor implementation to be able to deal with
@ -581,9 +584,12 @@ xfsaild_push(
if (stuck > 100) if (stuck > 100)
break; break;
next_item:
lip = xfs_trans_ail_cursor_next(ailp, &cur); lip = xfs_trans_ail_cursor_next(ailp, &cur);
if (lip == NULL) if (lip == NULL)
break; break;
if (lip->li_lsn != lsn && count > 1000)
break;
lsn = lip->li_lsn; lsn = lip->li_lsn;
} }
@ -620,7 +626,7 @@ out_done:
/* /*
* Assume we have more work to do in a short while. * Assume we have more work to do in a short while.
*/ */
tout = 10; tout = 0;
} }
return tout; return tout;