xfs: deferred scrub of dirents

If the trylock-based parent pointer check fails, retain those dirents
and check them at the end.  This may involve dropping the locks on the
file being scanned, so yay.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2024-04-22 09:48:02 -07:00
parent 61b3f0df5c
commit b961c8bf1f
4 changed files with 346 additions and 3 deletions

View File

@ -24,6 +24,10 @@
#include "scrub/readdir.h"
#include "scrub/health.h"
#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
/* Set us up to scrub directories. */
int
@ -43,12 +47,37 @@ xchk_setup_directory(
/* Directories */
/* Deferred directory entry that we saved for later. */
struct xchk_dirent {
/* Cookie for retrieval of the dirent name. */
xfblob_cookie name_cookie;
/* Child inode number. */
xfs_ino_t ino;
/* Length of the pptr name. */
uint8_t namelen;
};
struct xchk_dir {
struct xfs_scrub *sc;
/* information for parent pointer validation. */
struct xfs_parent_rec pptr_rec;
struct xfs_da_args pptr_args;
/* Fixed-size array of xchk_dirent structures. */
struct xfarray *dir_entries;
/* Blobs containing dirent names. */
struct xfblob *dir_names;
/* If we've cycled the ILOCK, we must revalidate deferred dirents. */
bool need_revalidate;
/* Name buffer for dirent revalidation. */
struct xfs_name xname;
uint8_t namebuf[MAXNAMELEN];
};
/* Scrub a directory entry. */
@ -148,8 +177,26 @@ xchk_dir_check_pptr_fast(
/* Try to lock the inode. */
lockmode = xchk_dir_lock_child(sc, ip);
if (!lockmode) {
xchk_set_incomplete(sc);
return -ECANCELED;
struct xchk_dirent save_de = {
.namelen = name->len,
.ino = ip->i_ino,
};
/* Couldn't lock the inode, so save the dirent for later. */
trace_xchk_dir_defer(sc->ip, name, ip->i_ino);
error = xfblob_storename(sd->dir_names, &save_de.name_cookie,
name);
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
&error))
return error;
error = xfarray_append(sd->dir_entries, &save_de);
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
&error))
return error;
return 0;
}
error = xchk_dir_parent_pointer(sd, name, ip);
@ -865,6 +912,142 @@ out:
return error;
}
/*
* Revalidate a dirent that we collected in the past but couldn't check because
* of lock contention. Returns 0 if the dirent is still valid, -ENOENT if it
* has gone away on us, or a negative errno.
*/
STATIC int
xchk_dir_revalidate_dirent(
struct xchk_dir *sd,
const struct xfs_name *xname,
xfs_ino_t ino)
{
struct xfs_scrub *sc = sd->sc;
xfs_ino_t child_ino;
int error;
/*
* Look up the directory entry. If we get -ENOENT, the directory entry
* went away and there's nothing to revalidate. Return any other
* error.
*/
error = xchk_dir_lookup(sc, sc->ip, xname, &child_ino);
if (error)
return error;
/* The inode number changed, nothing to revalidate. */
if (ino != child_ino)
return -ENOENT;
return 0;
}
/*
* Check a directory entry's parent pointers the slow way, which means we cycle
* locks a bunch and put up with revalidation until we get it done.
*/
STATIC int
xchk_dir_slow_dirent(
struct xchk_dir *sd,
struct xchk_dirent *dirent,
const struct xfs_name *xname)
{
struct xfs_scrub *sc = sd->sc;
struct xfs_inode *ip;
unsigned int lockmode;
int error;
/* Check that the deferred dirent still exists. */
if (sd->need_revalidate) {
error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
if (error == -ENOENT)
return 0;
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0,
&error))
return error;
}
error = xchk_iget(sc, dirent->ino, &ip);
if (error == -EINVAL || error == -ENOENT) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
return 0;
}
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
return error;
/*
* If we can grab both IOLOCK and ILOCK of the alleged child, we can
* proceed with the validation.
*/
lockmode = xchk_dir_lock_child(sc, ip);
if (lockmode) {
trace_xchk_dir_slowpath(sc->ip, xname, ip->i_ino);
goto check_pptr;
}
/*
* We couldn't lock the child file. Drop all the locks and try to
* get them again, one at a time.
*/
xchk_iunlock(sc, sc->ilock_flags);
sd->need_revalidate = true;
trace_xchk_dir_ultraslowpath(sc->ip, xname, ip->i_ino);
error = xchk_dir_trylock_for_pptrs(sc, ip, &lockmode);
if (error)
goto out_rele;
/* Revalidate, since we just cycled the locks. */
error = xchk_dir_revalidate_dirent(sd, xname, dirent->ino);
if (error == -ENOENT) {
error = 0;
goto out_unlock;
}
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out_unlock;
check_pptr:
error = xchk_dir_parent_pointer(sd, xname, ip);
out_unlock:
xfs_iunlock(ip, lockmode);
out_rele:
xchk_irele(sc, ip);
return error;
}
/* Check all the dirents that we deferred the first time around. */
STATIC int
xchk_dir_finish_slow_dirents(
struct xchk_dir *sd)
{
xfarray_idx_t array_cur;
int error;
foreach_xfarray_idx(sd->dir_entries, array_cur) {
struct xchk_dirent dirent;
if (sd->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return 0;
error = xfarray_load(sd->dir_entries, array_cur, &dirent);
if (error)
return error;
error = xfblob_loadname(sd->dir_names, dirent.name_cookie,
&sd->xname, dirent.namelen);
if (error)
return error;
error = xchk_dir_slow_dirent(sd, &dirent, &sd->xname);
if (error)
return error;
}
return 0;
}
/* Scrub a whole directory. */
int
xchk_directory(
@ -907,11 +1090,56 @@ xchk_directory(
if (!sd)
return -ENOMEM;
sd->sc = sc;
sd->xname.name = sd->namebuf;
if (xfs_has_parent(sc->mp)) {
char *descr;
/*
* Set up some staging memory for dirents that we can't check
* due to locking contention.
*/
descr = xchk_xfile_ino_descr(sc, "slow directory entries");
error = xfarray_create(descr, 0, sizeof(struct xchk_dirent),
&sd->dir_entries);
kfree(descr);
if (error)
goto out_sd;
descr = xchk_xfile_ino_descr(sc, "slow directory entry names");
error = xfblob_create(descr, &sd->dir_names);
kfree(descr);
if (error)
goto out_entries;
}
/* Look up every name in this directory by hash. */
error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, sd);
if (error == -ECANCELED)
error = 0;
if (error)
goto out_names;
if (xfs_has_parent(sc->mp)) {
error = xchk_dir_finish_slow_dirents(sd);
if (error == -ETIMEDOUT) {
/* Couldn't grab a lock, scrub was marked incomplete */
error = 0;
goto out_names;
}
if (error)
goto out_names;
}
out_names:
if (sd->dir_names)
xfblob_destroy(sd->dir_names);
out_entries:
if (sd->dir_entries)
xfarray_destroy(sd->dir_entries);
out_sd:
kvfree(sd);
if (error && error != -ECANCELED)
if (error)
return error;
/* If the dir is clean, it is clearly not zapped. */

View File

@ -18,6 +18,7 @@
#include "xfs_trans.h"
#include "xfs_error.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/readdir.h"
/* Call a function for every entry in a shortform directory. */
@ -380,3 +381,80 @@ out_check_rval:
*ino = args.inumber;
return error;
}
/*
* Try to grab the IOLOCK and ILOCK of sc->ip and ip, returning @ip's lock
* state. The caller may have a transaction, so we must use trylock for both
* IOLOCKs.
*/
static inline unsigned int
xchk_dir_trylock_both(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
if (!xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
return 0;
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
goto parent_iolock;
xchk_ilock(sc, XFS_ILOCK_EXCL);
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
goto parent_ilock;
return XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL;
parent_ilock:
xchk_iunlock(sc, XFS_ILOCK_EXCL);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
parent_iolock:
xchk_iunlock(sc, XFS_IOLOCK_EXCL);
return 0;
}
/*
* Try for a limited time to grab the IOLOCK and ILOCK of both the scrub target
* (@sc->ip) and the inode at the other end (@ip) of a directory or parent
* pointer link so that we can check that link.
*
* We do not know ahead of time that the directory tree is /not/ corrupt, so we
* cannot use the "lock two inode" functions because we do not know that there
* is not a racing thread trying to take the locks in opposite order. First
* take IOLOCK_EXCL of the scrub target, and then try to take IOLOCK_SHARED
* of @ip to synchronize with the VFS. Next, take ILOCK_EXCL of the scrub
* target and @ip to synchronize with XFS.
*
* If the trylocks succeed, *lockmode will be set to the locks held for @ip;
* @sc->ilock_flags will be set for the locks held for @sc->ip; and zero will
* be returned. If not, returns -EDEADLOCK to try again; or -ETIMEDOUT if
* XCHK_TRY_HARDER was set. Returns -EINTR if the process has been killed.
*/
int
xchk_dir_trylock_for_pptrs(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int *lockmode)
{
unsigned int nr;
int error = 0;
ASSERT(sc->ilock_flags == 0);
for (nr = 0; nr < HZ; nr++) {
*lockmode = xchk_dir_trylock_both(sc, ip);
if (*lockmode)
return 0;
if (xchk_should_terminate(sc, &error))
return error;
delay(1);
}
if (sc->flags & XCHK_TRY_HARDER) {
xchk_set_incomplete(sc);
return -ETIMEDOUT;
}
return -EDEADLOCK;
}

View File

@ -16,4 +16,7 @@ int xchk_dir_walk(struct xfs_scrub *sc, struct xfs_inode *dp,
int xchk_dir_lookup(struct xfs_scrub *sc, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t *ino);
int xchk_dir_trylock_for_pptrs(struct xfs_scrub *sc, struct xfs_inode *ip,
unsigned int *lockmode);
#endif /* __XFS_SCRUB_READDIR_H__ */

View File

@ -1511,6 +1511,40 @@ DEFINE_EVENT(xchk_nlinks_diff_class, name, \
TP_ARGS(mp, ip, live))
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xchk_nlinks_compare_inode);
DECLARE_EVENT_CLASS(xchk_pptr_class,
TP_PROTO(struct xfs_inode *ip, const struct xfs_name *name,
xfs_ino_t far_ino),
TP_ARGS(ip, name, far_ino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, namelen)
__dynamic_array(char, name, name->len)
__field(xfs_ino_t, far_ino)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->namelen = name->len;
memcpy(__get_str(name), name, name->len);
__entry->far_ino = far_ino;
),
TP_printk("dev %d:%d ino 0x%llx name '%.*s' far_ino 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->namelen,
__get_str(name),
__entry->far_ino)
)
#define DEFINE_XCHK_PPTR_EVENT(name) \
DEFINE_EVENT(xchk_pptr_class, name, \
TP_PROTO(struct xfs_inode *ip, const struct xfs_name *name, \
xfs_ino_t far_ino), \
TP_ARGS(ip, name, far_ino))
DEFINE_XCHK_PPTR_EVENT(xchk_dir_defer);
DEFINE_XCHK_PPTR_EVENT(xchk_dir_slowpath);
DEFINE_XCHK_PPTR_EVENT(xchk_dir_ultraslowpath);
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)