xfs: reduce the rate of cond_resched calls inside scrub

We really don't want to call cond_resched every single time we go
through a loop in scrub -- there may be billions of records, and probing
into the scheduler itself has overhead.  Reduce this overhead by only
calling cond_resched 10x per second; and add a counter so that we only
check jiffies once every 1000 records or so.

Surprisingly, this reduces scrub-only fstests runtime by about 2%.  I
used the bmapinflate xfs_db command to produce a billion-extent file and
this stupid gadget reduced the scrub runtime by about 4%.

From a stupid microbenchmark of calling these things 1 billion times, I
estimate that cond_resched costs about 5.5ns per call; jiffes costs
about 0.3ns per read; and fatal_signal_pending costs about 0.4ns per
call.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2024-04-22 09:48:23 -07:00
parent 3f31406aef
commit 271557de7c
6 changed files with 74 additions and 31 deletions

View File

@ -6,31 +6,6 @@
#ifndef __XFS_SCRUB_COMMON_H__
#define __XFS_SCRUB_COMMON_H__
/*
* We /could/ terminate a scrub/repair operation early. If we're not
* in a good place to continue (fatal signal, etc.) then bail out.
* Note that we're careful not to make any judgements about *error.
*/
static inline bool
xchk_should_terminate(
struct xfs_scrub *sc,
int *error)
{
/*
* If preemption is disabled, we need to yield to the scheduler every
* few seconds so that we don't run afoul of the soft lockup watchdog
* or RCU stall detector.
*/
cond_resched();
if (fatal_signal_pending(current)) {
if (*error == 0)
*error = -EINTR;
return true;
}
return false;
}
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
int xchk_trans_alloc_empty(struct xfs_scrub *sc);
void xchk_trans_cancel(struct xfs_scrub *sc);

View File

@ -620,6 +620,7 @@ xfs_scrub_metadata(
sc->sm = sm;
sc->ops = &meta_scrub_ops[sm->sm_type];
sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
sc->relax = INIT_XCHK_RELAX;
retry_op:
/*
* When repairs are allowed, prevent freezing or readonly remount while

View File

@ -8,6 +8,49 @@
struct xfs_scrub;
struct xchk_relax {
unsigned long next_resched;
unsigned int resched_nr;
bool interruptible;
};
/* Yield to the scheduler at most 10x per second. */
#define XCHK_RELAX_NEXT (jiffies + (HZ / 10))
#define INIT_XCHK_RELAX \
(struct xchk_relax){ \
.next_resched = XCHK_RELAX_NEXT, \
.resched_nr = 0, \
.interruptible = true, \
}
/*
* Relax during a scrub operation and exit if there's a fatal signal pending.
*
* If preemption is disabled, we need to yield to the scheduler every now and
* then so that we don't run afoul of the soft lockup watchdog or RCU stall
* detector. cond_resched calls are somewhat expensive (~5ns) so we want to
* ratelimit this to 10x per second. Amortize the cost of the other checks by
* only doing it once every 100 calls.
*/
static inline int xchk_maybe_relax(struct xchk_relax *widget)
{
/* Amortize the cost of scheduling and checking signals. */
if (likely(++widget->resched_nr < 100))
return 0;
widget->resched_nr = 0;
if (unlikely(widget->next_resched <= jiffies)) {
cond_resched();
widget->next_resched = XCHK_RELAX_NEXT;
}
if (widget->interruptible && fatal_signal_pending(current))
return -EINTR;
return 0;
}
/*
* Standard flags for allocating memory within scrub. NOFS context is
* configured by the process allocation scope. Scrub and repair must be able
@ -123,6 +166,9 @@ struct xfs_scrub {
*/
unsigned int sick_mask;
/* next time we want to cond_resched() */
struct xchk_relax relax;
/* State tracking for single-AG operations. */
struct xchk_ag sa;
};
@ -167,6 +213,24 @@ struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
unsigned int subtype);
void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
/*
* We /could/ terminate a scrub/repair operation early. If we're not
* in a good place to continue (fatal signal, etc.) then bail out.
* Note that we're careful not to make any judgements about *error.
*/
static inline bool
xchk_should_terminate(
struct xfs_scrub *sc,
int *error)
{
if (xchk_maybe_relax(&sc->relax)) {
if (*error == 0)
*error = -EINTR;
return true;
}
return false;
}
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);

View File

@ -7,9 +7,9 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/scrub.h"
#include "scrub/trace.h"
/*
@ -486,6 +486,9 @@ xfarray_sortinfo_alloc(
xfarray_sortinfo_lo(si)[0] = 0;
xfarray_sortinfo_hi(si)[0] = array->nr - 1;
si->relax = INIT_XCHK_RELAX;
if (flags & XFARRAY_SORT_KILLABLE)
si->relax.interruptible = false;
trace_xfarray_sort(si, nr_bytes);
*infop = si;
@ -503,10 +506,7 @@ xfarray_sort_terminated(
* few seconds so that we don't run afoul of the soft lockup watchdog
* or RCU stall detector.
*/
cond_resched();
if ((si->flags & XFARRAY_SORT_KILLABLE) &&
fatal_signal_pending(current)) {
if (xchk_maybe_relax(&si->relax)) {
if (*error == 0)
*error = -EINTR;
return true;

View File

@ -127,6 +127,9 @@ struct xfarray_sortinfo {
/* XFARRAY_SORT_* flags; see below. */
unsigned int flags;
/* next time we want to cond_resched() */
struct xchk_relax relax;
/* Cache a folio here for faster scanning for pivots */
struct folio *folio;

View File

@ -10,9 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/scrub.h"
#include "scrub/trace.h"
#include <linux/shmem_fs.h>