mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 05:32:00 +00:00
150bb10a28
generic/388 has an annoying tendency to fail like this during log recovery: XFS (sda4): Unmounting Filesystem 435fe39b-82b6-46ef-be56-819499585130 XFS (sda4): Mounting V5 Filesystem 435fe39b-82b6-46ef-be56-819499585130 XFS (sda4): Starting recovery (logdev: internal) 00000000: 49 4e 81 b6 03 02 00 00 00 00 00 07 00 00 00 07 IN.............. 00000010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 10 ................ 00000020: 35 9a 8b c1 3e 6e 81 00 35 9a 8b c1 3f dc b7 00 5...>n..5...?... 00000030: 35 9a 8b c1 3f dc b7 00 00 00 00 00 00 3c 86 4f 5...?........<.O 00000040: 00 00 00 00 00 00 02 f3 00 00 00 00 00 00 00 00 ................ 00000050: 00 00 1f 01 00 00 00 00 00 00 00 02 b2 74 c9 0b .............t.. 00000060: ff ff ff ff d7 45 73 10 00 00 00 00 00 00 00 2d .....Es........- 00000070: 00 00 07 92 00 01 fe 30 00 00 00 00 00 00 00 1a .......0........ 00000080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000090: 35 9a 8b c1 3b 55 0c 00 00 00 00 00 04 27 b2 d1 5...;U.......'.. 000000a0: 43 5f e3 9b 82 b6 46 ef be 56 81 94 99 58 51 30 C_....F..V...XQ0 XFS (sda4): Internal error Bad dinode after recovery at line 539 of file fs/xfs/xfs_inode_item_recover.c. Caller xlog_recover_items_pass2+0x4e/0xc0 [xfs] CPU: 0 PID: 2189311 Comm: mount Not tainted 6.9.0-rc4-djwx #rc4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20171121_152543-x86-ol7-builder-01.us.oracle.com-4.el7.1 04/01/2014 Call Trace: <TASK> dump_stack_lvl+0x4f/0x60 xfs_corruption_error+0x90/0xa0 xlog_recover_inode_commit_pass2+0x5f1/0xb00 xlog_recover_items_pass2+0x4e/0xc0 xlog_recover_commit_trans+0x2db/0x350 xlog_recovery_process_trans+0xab/0xe0 xlog_recover_process_data+0xa7/0x130 xlog_do_recovery_pass+0x398/0x840 xlog_do_log_recovery+0x62/0xc0 xlog_do_recover+0x34/0x1d0 xlog_recover+0xe9/0x1a0 xfs_log_mount+0xff/0x260 xfs_mountfs+0x5d9/0xb60 xfs_fs_fill_super+0x76b/0xa30 get_tree_bdev+0x124/0x1d0 vfs_get_tree+0x17/0xa0 path_mount+0x72b/0xa90 __x64_sys_mount+0x112/0x150 do_syscall_64+0x49/0x100 entry_SYSCALL_64_after_hwframe+0x4b/0x53 </TASK> XFS (sda4): Corruption detected. Unmount and run xfs_repair XFS (sda4): Metadata corruption detected at xfs_dinode_verify.part.0+0x739/0x920 [xfs], inode 0x427b2d1 XFS (sda4): Filesystem has been shut down due to log error (0x2). XFS (sda4): Please unmount the filesystem and rectify the problem(s). XFS (sda4): log mount/recovery failed: error -117 XFS (sda4): log mount failed This inode log item recovery failing the dinode verifier after replaying the contents of the inode log item into the ondisk inode. Looking back into what the kernel was doing at the time of the fs shutdown, a thread was in the middle of running a series of transactions, each of which committed changes to the inode. At some point in the middle of that chain, an invalid (at least according to the verifier) change was committed. Had the filesystem not shut down in the middle of the chain, a subsequent transaction would have corrected the invalid state and nobody would have noticed. But that's not what happened here. Instead, the invalid inode state was committed to the ondisk log, so log recovery tripped over it. The actual defect here was an overzealous inode verifier, which was fixed in a separate patch. This patch adds some transaction precommit functions for CONFIG_XFS_DEBUG=y mode so that we can detect these kinds of transient errors at transaction commit time, where it's much easier to find the root cause. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
252 lines
5.9 KiB
C
252 lines
5.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000-2003 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_quota.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_buf_item.h"
|
|
#include "xfs_trans_priv.h"
|
|
#include "xfs_qm.h"
|
|
#include "xfs_log.h"
|
|
#include "xfs_error.h"
|
|
|
|
static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
|
|
{
|
|
return container_of(lip, struct xfs_dq_logitem, qli_item);
|
|
}
|
|
|
|
/*
|
|
* returns the number of iovecs needed to log the given dquot item.
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_size(
|
|
struct xfs_log_item *lip,
|
|
int *nvecs,
|
|
int *nbytes)
|
|
{
|
|
*nvecs += 2;
|
|
*nbytes += sizeof(struct xfs_dq_logformat) +
|
|
sizeof(struct xfs_disk_dquot);
|
|
}
|
|
|
|
/*
|
|
* fills in the vector of log iovecs for the given dquot log item.
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_format(
|
|
struct xfs_log_item *lip,
|
|
struct xfs_log_vec *lv)
|
|
{
|
|
struct xfs_disk_dquot ddq;
|
|
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
|
|
struct xfs_log_iovec *vecp = NULL;
|
|
struct xfs_dq_logformat *qlf;
|
|
|
|
qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT);
|
|
qlf->qlf_type = XFS_LI_DQUOT;
|
|
qlf->qlf_size = 2;
|
|
qlf->qlf_id = qlip->qli_dquot->q_id;
|
|
qlf->qlf_blkno = qlip->qli_dquot->q_blkno;
|
|
qlf->qlf_len = 1;
|
|
qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset;
|
|
xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat));
|
|
|
|
xfs_dquot_to_disk(&ddq, qlip->qli_dquot);
|
|
|
|
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, &ddq,
|
|
sizeof(struct xfs_disk_dquot));
|
|
}
|
|
|
|
/*
|
|
* Increment the pin count of the given dquot.
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_pin(
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
atomic_inc(&dqp->q_pincount);
|
|
}
|
|
|
|
/*
|
|
* Decrement the pin count of the given dquot, and wake up
|
|
* anyone in xfs_dqwait_unpin() if the count goes to 0. The
|
|
* dquot must have been previously pinned with a call to
|
|
* xfs_qm_dquot_logitem_pin().
|
|
*/
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_unpin(
|
|
struct xfs_log_item *lip,
|
|
int remove)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
|
|
ASSERT(atomic_read(&dqp->q_pincount) > 0);
|
|
if (atomic_dec_and_test(&dqp->q_pincount))
|
|
wake_up(&dqp->q_pinwait);
|
|
}
|
|
|
|
/*
|
|
* This is called to wait for the given dquot to be unpinned.
|
|
* Most of these pin/unpin routines are plagiarized from inode code.
|
|
*/
|
|
void
|
|
xfs_qm_dqunpin_wait(
|
|
struct xfs_dquot *dqp)
|
|
{
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
if (atomic_read(&dqp->q_pincount) == 0)
|
|
return;
|
|
|
|
/*
|
|
* Give the log a push so we don't wait here too long.
|
|
*/
|
|
xfs_log_force(dqp->q_mount, 0);
|
|
wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
|
|
}
|
|
|
|
STATIC uint
|
|
xfs_qm_dquot_logitem_push(
|
|
struct xfs_log_item *lip,
|
|
struct list_head *buffer_list)
|
|
__releases(&lip->li_ailp->ail_lock)
|
|
__acquires(&lip->li_ailp->ail_lock)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
struct xfs_buf *bp = lip->li_buf;
|
|
uint rval = XFS_ITEM_SUCCESS;
|
|
int error;
|
|
|
|
if (atomic_read(&dqp->q_pincount) > 0)
|
|
return XFS_ITEM_PINNED;
|
|
|
|
if (!xfs_dqlock_nowait(dqp))
|
|
return XFS_ITEM_LOCKED;
|
|
|
|
/*
|
|
* Re-check the pincount now that we stabilized the value by
|
|
* taking the quota lock.
|
|
*/
|
|
if (atomic_read(&dqp->q_pincount) > 0) {
|
|
rval = XFS_ITEM_PINNED;
|
|
goto out_unlock;
|
|
}
|
|
|
|
/*
|
|
* Someone else is already flushing the dquot. Nothing we can do
|
|
* here but wait for the flush to finish and remove the item from
|
|
* the AIL.
|
|
*/
|
|
if (!xfs_dqflock_nowait(dqp)) {
|
|
rval = XFS_ITEM_FLUSHING;
|
|
goto out_unlock;
|
|
}
|
|
|
|
spin_unlock(&lip->li_ailp->ail_lock);
|
|
|
|
error = xfs_qm_dqflush(dqp, &bp);
|
|
if (!error) {
|
|
if (!xfs_buf_delwri_queue(bp, buffer_list))
|
|
rval = XFS_ITEM_FLUSHING;
|
|
xfs_buf_relse(bp);
|
|
} else if (error == -EAGAIN)
|
|
rval = XFS_ITEM_LOCKED;
|
|
|
|
spin_lock(&lip->li_ailp->ail_lock);
|
|
out_unlock:
|
|
xfs_dqunlock(dqp);
|
|
return rval;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_release(
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
|
|
/*
|
|
* dquots are never 'held' from getting unlocked at the end of
|
|
* a transaction. Their locking and unlocking is hidden inside the
|
|
* transaction layer, within trans_commit. Hence, no LI_HOLD flag
|
|
* for the logitem.
|
|
*/
|
|
xfs_dqunlock(dqp);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_qm_dquot_logitem_committing(
|
|
struct xfs_log_item *lip,
|
|
xfs_csn_t seq)
|
|
{
|
|
return xfs_qm_dquot_logitem_release(lip);
|
|
}
|
|
|
|
#ifdef DEBUG_EXPENSIVE
|
|
static int
|
|
xfs_qm_dquot_logitem_precommit(
|
|
struct xfs_trans *tp,
|
|
struct xfs_log_item *lip)
|
|
{
|
|
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
|
|
struct xfs_mount *mp = dqp->q_mount;
|
|
struct xfs_disk_dquot ddq = { };
|
|
xfs_failaddr_t fa;
|
|
|
|
xfs_dquot_to_disk(&ddq, dqp);
|
|
fa = xfs_dquot_verify(mp, &ddq, dqp->q_id);
|
|
if (fa) {
|
|
XFS_CORRUPTION_ERROR("Bad dquot during logging",
|
|
XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq));
|
|
xfs_alert(mp,
|
|
"Metadata corruption detected at %pS, dquot 0x%x",
|
|
fa, dqp->q_id);
|
|
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
|
|
ASSERT(fa == NULL);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
#else
|
|
# define xfs_qm_dquot_logitem_precommit NULL
|
|
#endif
|
|
|
|
static const struct xfs_item_ops xfs_dquot_item_ops = {
|
|
.iop_size = xfs_qm_dquot_logitem_size,
|
|
.iop_precommit = xfs_qm_dquot_logitem_precommit,
|
|
.iop_format = xfs_qm_dquot_logitem_format,
|
|
.iop_pin = xfs_qm_dquot_logitem_pin,
|
|
.iop_unpin = xfs_qm_dquot_logitem_unpin,
|
|
.iop_release = xfs_qm_dquot_logitem_release,
|
|
.iop_committing = xfs_qm_dquot_logitem_committing,
|
|
.iop_push = xfs_qm_dquot_logitem_push,
|
|
};
|
|
|
|
/*
|
|
* Initialize the dquot log item for a newly allocated dquot.
|
|
* The dquot isn't locked at this point, but it isn't on any of the lists
|
|
* either, so we don't care.
|
|
*/
|
|
void
|
|
xfs_qm_dquot_logitem_init(
|
|
struct xfs_dquot *dqp)
|
|
{
|
|
struct xfs_dq_logitem *lp = &dqp->q_logitem;
|
|
|
|
xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
|
|
&xfs_dquot_item_ops);
|
|
lp->qli_dquot = dqp;
|
|
}
|