|
|
|
@ -0,0 +1,794 @@
|
|
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
|
/*
|
|
|
|
|
* Copyright (C) 2018-2023 Oracle. All Rights Reserved.
|
|
|
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
|
|
|
*/
|
|
|
|
|
#include "xfs.h"
|
|
|
|
|
#include "xfs_fs.h"
|
|
|
|
|
#include "xfs_shared.h"
|
|
|
|
|
#include "xfs_format.h"
|
|
|
|
|
#include "xfs_trans_resv.h"
|
|
|
|
|
#include "xfs_mount.h"
|
|
|
|
|
#include "xfs_defer.h"
|
|
|
|
|
#include "xfs_btree.h"
|
|
|
|
|
#include "xfs_btree_staging.h"
|
|
|
|
|
#include "xfs_inode.h"
|
|
|
|
|
#include "xfs_bit.h"
|
|
|
|
|
#include "xfs_log_format.h"
|
|
|
|
|
#include "xfs_trans.h"
|
|
|
|
|
#include "xfs_sb.h"
|
|
|
|
|
#include "xfs_alloc.h"
|
|
|
|
|
#include "xfs_ialloc.h"
|
|
|
|
|
#include "xfs_rmap.h"
|
|
|
|
|
#include "xfs_rmap_btree.h"
|
|
|
|
|
#include "xfs_refcount.h"
|
|
|
|
|
#include "xfs_refcount_btree.h"
|
|
|
|
|
#include "xfs_error.h"
|
|
|
|
|
#include "xfs_ag.h"
|
|
|
|
|
#include "scrub/xfs_scrub.h"
|
|
|
|
|
#include "scrub/scrub.h"
|
|
|
|
|
#include "scrub/common.h"
|
|
|
|
|
#include "scrub/btree.h"
|
|
|
|
|
#include "scrub/trace.h"
|
|
|
|
|
#include "scrub/repair.h"
|
|
|
|
|
#include "scrub/bitmap.h"
|
|
|
|
|
#include "scrub/agb_bitmap.h"
|
|
|
|
|
#include "scrub/xfile.h"
|
|
|
|
|
#include "scrub/xfarray.h"
|
|
|
|
|
#include "scrub/newbt.h"
|
|
|
|
|
#include "scrub/reap.h"
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Rebuilding the Reference Count Btree
|
|
|
|
|
* ====================================
|
|
|
|
|
*
|
|
|
|
|
* This algorithm is "borrowed" from xfs_repair. Imagine the rmap
|
|
|
|
|
* entries as rectangles representing extents of physical blocks, and
|
|
|
|
|
* that the rectangles can be laid down to allow them to overlap each
|
|
|
|
|
* other; then we know that we must emit a refcnt btree entry wherever
|
|
|
|
|
* the amount of overlap changes, i.e. the emission stimulus is
|
|
|
|
|
* level-triggered:
|
|
|
|
|
*
|
|
|
|
|
* - ---
|
|
|
|
|
* -- ----- ---- --- ------
|
|
|
|
|
* -- ---- ----------- ---- ---------
|
|
|
|
|
* -------------------------------- -----------
|
|
|
|
|
* ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
|
|
|
|
|
* 2 1 23 21 3 43 234 2123 1 01 2 3 0
|
|
|
|
|
*
|
|
|
|
|
* For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
|
|
|
|
|
*
|
|
|
|
|
* Note that in the actual refcnt btree we don't store the refcount < 2
|
|
|
|
|
* cases because the bnobt tells us which blocks are free; single-use
|
|
|
|
|
* blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
|
|
|
|
|
* supports storing multiple entries covering a given block we could
|
|
|
|
|
* theoretically dispense with the refcntbt and simply count rmaps, but
|
|
|
|
|
* that's inefficient in the (hot) write path, so we'll take the cost of
|
|
|
|
|
* the extra tree to save time. Also there's no guarantee that rmap
|
|
|
|
|
* will be enabled.
|
|
|
|
|
*
|
|
|
|
|
* Given an array of rmaps sorted by physical block number, a starting
|
|
|
|
|
* physical block (sp), a bag to hold rmaps that cover sp, and the next
|
|
|
|
|
* physical block where the level changes (np), we can reconstruct the
|
|
|
|
|
* refcount btree as follows:
|
|
|
|
|
*
|
|
|
|
|
* While there are still unprocessed rmaps in the array,
|
|
|
|
|
* - Set sp to the physical block (pblk) of the next unprocessed rmap.
|
|
|
|
|
* - Add to the bag all rmaps in the array where startblock == sp.
|
|
|
|
|
* - Set np to the physical block where the bag size will change. This
|
|
|
|
|
* is the minimum of (the pblk of the next unprocessed rmap) and
|
|
|
|
|
* (startblock + len of each rmap in the bag).
|
|
|
|
|
* - Record the bag size as old_bag_size.
|
|
|
|
|
*
|
|
|
|
|
* - While the bag isn't empty,
|
|
|
|
|
* - Remove from the bag all rmaps where startblock + len == np.
|
|
|
|
|
* - Add to the bag all rmaps in the array where startblock == np.
|
|
|
|
|
* - If the bag size isn't old_bag_size, store the refcount entry
|
|
|
|
|
* (sp, np - sp, bag_size) in the refcnt btree.
|
|
|
|
|
* - If the bag is empty, break out of the inner loop.
|
|
|
|
|
* - Set old_bag_size to the bag size
|
|
|
|
|
* - Set sp = np.
|
|
|
|
|
* - Set np to the physical block where the bag size will change.
|
|
|
|
|
* This is the minimum of (the pblk of the next unprocessed rmap)
|
|
|
|
|
* and (startblock + len of each rmap in the bag).
|
|
|
|
|
*
|
|
|
|
|
* Like all the other repairers, we make a list of all the refcount
|
|
|
|
|
* records we need, then reinitialize the refcount btree root and
|
|
|
|
|
* insert all the records.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* The only parts of the rmap that we care about for computing refcounts. */
|
|
|
|
|
struct xrep_refc_rmap {
|
|
|
|
|
xfs_agblock_t startblock;
|
|
|
|
|
xfs_extlen_t blockcount;
|
|
|
|
|
} __packed;
|
|
|
|
|
|
|
|
|
|
struct xrep_refc {
|
|
|
|
|
/* refcount extents */
|
|
|
|
|
struct xfarray *refcount_records;
|
|
|
|
|
|
|
|
|
|
/* new refcountbt information */
|
|
|
|
|
struct xrep_newbt new_btree;
|
|
|
|
|
|
|
|
|
|
/* old refcountbt blocks */
|
|
|
|
|
struct xagb_bitmap old_refcountbt_blocks;
|
|
|
|
|
|
|
|
|
|
struct xfs_scrub *sc;
|
|
|
|
|
|
|
|
|
|
/* get_records()'s position in the refcount record array. */
|
|
|
|
|
xfarray_idx_t array_cur;
|
|
|
|
|
|
|
|
|
|
/* # of refcountbt blocks */
|
|
|
|
|
xfs_extlen_t btblocks;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Check for any obvious conflicts with this shared/CoW staging extent. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_check_ext(
|
|
|
|
|
struct xfs_scrub *sc,
|
|
|
|
|
const struct xfs_refcount_irec *rec)
|
|
|
|
|
{
|
|
|
|
|
enum xbtree_recpacking outcome;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
if (xfs_refcount_check_irec(sc->sa.pag, rec) != NULL)
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
/* Make sure this isn't free space. */
|
|
|
|
|
error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rc_startblock,
|
|
|
|
|
rec->rc_blockcount, &outcome);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
if (outcome != XBTREE_RECPACKING_EMPTY)
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
/* Must not be an inode chunk. */
|
|
|
|
|
error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
|
|
|
|
|
rec->rc_startblock, rec->rc_blockcount, &outcome);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
if (outcome != XBTREE_RECPACKING_EMPTY)
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Record a reference count extent. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_stash(
|
|
|
|
|
struct xrep_refc *rr,
|
|
|
|
|
enum xfs_refc_domain domain,
|
|
|
|
|
xfs_agblock_t agbno,
|
|
|
|
|
xfs_extlen_t len,
|
|
|
|
|
uint64_t refcount)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_refcount_irec irec = {
|
|
|
|
|
.rc_startblock = agbno,
|
|
|
|
|
.rc_blockcount = len,
|
|
|
|
|
.rc_domain = domain,
|
|
|
|
|
};
|
|
|
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
irec.rc_refcount = min_t(uint64_t, MAXREFCOUNT, refcount);
|
|
|
|
|
|
|
|
|
|
error = xrep_refc_check_ext(rr->sc, &irec);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
trace_xrep_refc_found(sc->sa.pag, &irec);
|
|
|
|
|
|
|
|
|
|
return xfarray_append(rr->refcount_records, &irec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Record a CoW staging extent. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_stash_cow(
|
|
|
|
|
struct xrep_refc *rr,
|
|
|
|
|
xfs_agblock_t agbno,
|
|
|
|
|
xfs_extlen_t len)
|
|
|
|
|
{
|
|
|
|
|
return xrep_refc_stash(rr, XFS_REFC_DOMAIN_COW, agbno, len, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Decide if an rmap could describe a shared extent. */
|
|
|
|
|
static inline bool
|
|
|
|
|
xrep_refc_rmap_shareable(
|
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
|
const struct xfs_rmap_irec *rmap)
|
|
|
|
|
{
|
|
|
|
|
/* AG metadata are never sharable */
|
|
|
|
|
if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Metadata in files are never shareable */
|
|
|
|
|
if (xfs_internal_inum(mp, rmap->rm_owner))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Metadata and unwritten file blocks are not shareable. */
|
|
|
|
|
if (rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
|
|
|
|
|
XFS_RMAP_UNWRITTEN))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Walk along the reverse mapping records until we find one that could describe
|
|
|
|
|
* a shared extent.
|
|
|
|
|
*/
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_walk_rmaps(
|
|
|
|
|
struct xrep_refc *rr,
|
|
|
|
|
struct xrep_refc_rmap *rrm,
|
|
|
|
|
bool *have_rec)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_rmap_irec rmap;
|
|
|
|
|
struct xfs_btree_cur *cur = rr->sc->sa.rmap_cur;
|
|
|
|
|
struct xfs_mount *mp = cur->bc_mp;
|
|
|
|
|
int have_gt;
|
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
|
|
*have_rec = false;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Loop through the remaining rmaps. Remember CoW staging
|
|
|
|
|
* extents and the refcountbt blocks from the old tree for later
|
|
|
|
|
* disposal. We can only share written data fork extents, so
|
|
|
|
|
* keep looping until we find an rmap for one.
|
|
|
|
|
*/
|
|
|
|
|
do {
|
|
|
|
|
if (xchk_should_terminate(rr->sc, &error))
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
error = xfs_btree_increment(cur, 0, &have_gt);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
if (!have_gt)
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
error = xfs_rmap_get_rec(cur, &rmap, &have_gt);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
if (XFS_IS_CORRUPT(mp, !have_gt))
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
if (rmap.rm_owner == XFS_RMAP_OWN_COW) {
|
|
|
|
|
error = xrep_refc_stash_cow(rr, rmap.rm_startblock,
|
|
|
|
|
rmap.rm_blockcount);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
} else if (rmap.rm_owner == XFS_RMAP_OWN_REFC) {
|
|
|
|
|
/* refcountbt block, dump it when we're done. */
|
|
|
|
|
rr->btblocks += rmap.rm_blockcount;
|
|
|
|
|
error = xagb_bitmap_set(&rr->old_refcountbt_blocks,
|
|
|
|
|
rmap.rm_startblock, rmap.rm_blockcount);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
} while (!xrep_refc_rmap_shareable(mp, &rmap));
|
|
|
|
|
|
|
|
|
|
rrm->startblock = rmap.rm_startblock;
|
|
|
|
|
rrm->blockcount = rmap.rm_blockcount;
|
|
|
|
|
*have_rec = true;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static inline uint32_t
|
|
|
|
|
xrep_refc_encode_startblock(
|
|
|
|
|
const struct xfs_refcount_irec *irec)
|
|
|
|
|
{
|
|
|
|
|
uint32_t start;
|
|
|
|
|
|
|
|
|
|
start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
|
|
|
|
|
if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
|
|
|
|
|
start |= XFS_REFC_COWFLAG;
|
|
|
|
|
|
|
|
|
|
return start;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sort in the same order as the ondisk records. */
|
|
|
|
|
static int
|
|
|
|
|
xrep_refc_extent_cmp(
|
|
|
|
|
const void *a,
|
|
|
|
|
const void *b)
|
|
|
|
|
{
|
|
|
|
|
const struct xfs_refcount_irec *ap = a;
|
|
|
|
|
const struct xfs_refcount_irec *bp = b;
|
|
|
|
|
uint32_t sa, sb;
|
|
|
|
|
|
|
|
|
|
sa = xrep_refc_encode_startblock(ap);
|
|
|
|
|
sb = xrep_refc_encode_startblock(bp);
|
|
|
|
|
|
|
|
|
|
if (sa > sb)
|
|
|
|
|
return 1;
|
|
|
|
|
if (sa < sb)
|
|
|
|
|
return -1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Sort the refcount extents by startblock or else the btree records will be in
|
|
|
|
|
* the wrong order. Make sure the records do not overlap in physical space.
|
|
|
|
|
*/
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_sort_records(
|
|
|
|
|
struct xrep_refc *rr)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_refcount_irec irec;
|
|
|
|
|
xfarray_idx_t cur;
|
|
|
|
|
enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
|
|
|
|
|
xfs_agblock_t next_agbno = 0;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
error = xfarray_sort(rr->refcount_records, xrep_refc_extent_cmp,
|
|
|
|
|
XFARRAY_SORT_KILLABLE);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
foreach_xfarray_idx(rr->refcount_records, cur) {
|
|
|
|
|
if (xchk_should_terminate(rr->sc, &error))
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
error = xfarray_load(rr->refcount_records, cur, &irec);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
if (dom == XFS_REFC_DOMAIN_SHARED &&
|
|
|
|
|
irec.rc_domain == XFS_REFC_DOMAIN_COW) {
|
|
|
|
|
dom = irec.rc_domain;
|
|
|
|
|
next_agbno = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (dom != irec.rc_domain)
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
if (irec.rc_startblock < next_agbno)
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
next_agbno = irec.rc_startblock + irec.rc_blockcount;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define RRM_NEXT(r) ((r).startblock + (r).blockcount)
|
|
|
|
|
/*
|
|
|
|
|
* Find the next block where the refcount changes, given the next rmap we
|
|
|
|
|
* looked at and the ones we're already tracking.
|
|
|
|
|
*/
|
|
|
|
|
static inline int
|
|
|
|
|
xrep_refc_next_edge(
|
|
|
|
|
struct xfarray *rmap_bag,
|
|
|
|
|
struct xrep_refc_rmap *next_rrm,
|
|
|
|
|
bool next_valid,
|
|
|
|
|
xfs_agblock_t *nbnop)
|
|
|
|
|
{
|
|
|
|
|
struct xrep_refc_rmap rrm;
|
|
|
|
|
xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
|
|
|
|
|
xfs_agblock_t nbno = NULLAGBLOCK;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
if (next_valid)
|
|
|
|
|
nbno = next_rrm->startblock;
|
|
|
|
|
|
|
|
|
|
while ((error = xfarray_iter(rmap_bag, &array_cur, &rrm)) == 1)
|
|
|
|
|
nbno = min_t(xfs_agblock_t, nbno, RRM_NEXT(rrm));
|
|
|
|
|
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We should have found /something/ because either next_rrm is the next
|
|
|
|
|
* interesting rmap to look at after emitting this refcount extent, or
|
|
|
|
|
* there are other rmaps in rmap_bag contributing to the current
|
|
|
|
|
* sharing count. But if something is seriously wrong, bail out.
|
|
|
|
|
*/
|
|
|
|
|
if (nbno == NULLAGBLOCK)
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
*nbnop = nbno;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Walk forward through the rmap btree to collect all rmaps starting at
|
|
|
|
|
* @bno in @rmap_bag. These represent the file(s) that share ownership of
|
|
|
|
|
* the current block. Upon return, the rmap cursor points to the last record
|
|
|
|
|
* satisfying the startblock constraint.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
xrep_refc_push_rmaps_at(
|
|
|
|
|
struct xrep_refc *rr,
|
|
|
|
|
struct xfarray *rmap_bag,
|
|
|
|
|
xfs_agblock_t bno,
|
|
|
|
|
struct xrep_refc_rmap *rrm,
|
|
|
|
|
bool *have,
|
|
|
|
|
uint64_t *stack_sz)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
|
int have_gt;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
while (*have && rrm->startblock == bno) {
|
|
|
|
|
error = xfarray_store_anywhere(rmap_bag, rrm);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
(*stack_sz)++;
|
|
|
|
|
error = xrep_refc_walk_rmaps(rr, rrm, have);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
error = xfs_btree_decrement(sc->sa.rmap_cur, 0, &have_gt);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
if (XFS_IS_CORRUPT(sc->mp, !have_gt))
|
|
|
|
|
return -EFSCORRUPTED;
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Iterate all the rmap records to generate reference count data. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_find_refcounts(
|
|
|
|
|
struct xrep_refc *rr)
|
|
|
|
|
{
|
|
|
|
|
struct xrep_refc_rmap rrm;
|
|
|
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
|
struct xfarray *rmap_bag;
|
|
|
|
|
char *descr;
|
|
|
|
|
uint64_t old_stack_sz;
|
|
|
|
|
uint64_t stack_sz = 0;
|
|
|
|
|
xfs_agblock_t sbno;
|
|
|
|
|
xfs_agblock_t cbno;
|
|
|
|
|
xfs_agblock_t nbno;
|
|
|
|
|
bool have;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
xrep_ag_btcur_init(sc, &sc->sa);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set up a sparse array to store all the rmap records that we're
|
|
|
|
|
* tracking to generate a reference count record. If this exceeds
|
|
|
|
|
* MAXREFCOUNT, we clamp rc_refcount.
|
|
|
|
|
*/
|
|
|
|
|
descr = xchk_xfile_ag_descr(sc, "rmap record bag");
|
|
|
|
|
error = xfarray_create(descr, 0, sizeof(struct xrep_refc_rmap),
|
|
|
|
|
&rmap_bag);
|
|
|
|
|
kfree(descr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_cur;
|
|
|
|
|
|
|
|
|
|
/* Start the rmapbt cursor to the left of all records. */
|
|
|
|
|
error = xfs_btree_goto_left_edge(sc->sa.rmap_cur);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
|
|
|
|
|
/* Process reverse mappings into refcount data. */
|
|
|
|
|
while (xfs_btree_has_more_records(sc->sa.rmap_cur)) {
|
|
|
|
|
/* Push all rmaps with pblk == sbno onto the stack */
|
|
|
|
|
error = xrep_refc_walk_rmaps(rr, &rrm, &have);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
if (!have)
|
|
|
|
|
break;
|
|
|
|
|
sbno = cbno = rrm.startblock;
|
|
|
|
|
error = xrep_refc_push_rmaps_at(rr, rmap_bag, sbno,
|
|
|
|
|
&rrm, &have, &stack_sz);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
|
|
|
|
|
/* Set nbno to the bno of the next refcount change */
|
|
|
|
|
error = xrep_refc_next_edge(rmap_bag, &rrm, have, &nbno);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
|
|
|
|
|
ASSERT(nbno > sbno);
|
|
|
|
|
old_stack_sz = stack_sz;
|
|
|
|
|
|
|
|
|
|
/* While stack isn't empty... */
|
|
|
|
|
while (stack_sz) {
|
|
|
|
|
xfarray_idx_t array_cur = XFARRAY_CURSOR_INIT;
|
|
|
|
|
|
|
|
|
|
/* Pop all rmaps that end at nbno */
|
|
|
|
|
while ((error = xfarray_iter(rmap_bag, &array_cur,
|
|
|
|
|
&rrm)) == 1) {
|
|
|
|
|
if (RRM_NEXT(rrm) != nbno)
|
|
|
|
|
continue;
|
|
|
|
|
error = xfarray_unset(rmap_bag, array_cur - 1);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
stack_sz--;
|
|
|
|
|
}
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
|
|
|
|
|
/* Push array items that start at nbno */
|
|
|
|
|
error = xrep_refc_walk_rmaps(rr, &rrm, &have);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
if (have) {
|
|
|
|
|
error = xrep_refc_push_rmaps_at(rr, rmap_bag,
|
|
|
|
|
nbno, &rrm, &have, &stack_sz);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Emit refcount if necessary */
|
|
|
|
|
ASSERT(nbno > cbno);
|
|
|
|
|
if (stack_sz != old_stack_sz) {
|
|
|
|
|
if (old_stack_sz > 1) {
|
|
|
|
|
error = xrep_refc_stash(rr,
|
|
|
|
|
XFS_REFC_DOMAIN_SHARED,
|
|
|
|
|
cbno, nbno - cbno,
|
|
|
|
|
old_stack_sz);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
}
|
|
|
|
|
cbno = nbno;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Stack empty, go find the next rmap */
|
|
|
|
|
if (stack_sz == 0)
|
|
|
|
|
break;
|
|
|
|
|
old_stack_sz = stack_sz;
|
|
|
|
|
sbno = nbno;
|
|
|
|
|
|
|
|
|
|
/* Set nbno to the bno of the next refcount change */
|
|
|
|
|
error = xrep_refc_next_edge(rmap_bag, &rrm, have,
|
|
|
|
|
&nbno);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bag;
|
|
|
|
|
|
|
|
|
|
ASSERT(nbno > sbno);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ASSERT(stack_sz == 0);
|
|
|
|
|
out_bag:
|
|
|
|
|
xfarray_destroy(rmap_bag);
|
|
|
|
|
out_cur:
|
|
|
|
|
xchk_ag_btcur_free(&sc->sa);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
#undef RRM_NEXT
|
|
|
|
|
|
|
|
|
|
/* Retrieve refcountbt data for bulk load. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_get_records(
|
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
|
unsigned int idx,
|
|
|
|
|
struct xfs_btree_block *block,
|
|
|
|
|
unsigned int nr_wanted,
|
|
|
|
|
void *priv)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
|
|
|
|
|
struct xrep_refc *rr = priv;
|
|
|
|
|
union xfs_btree_rec *block_rec;
|
|
|
|
|
unsigned int loaded;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
|
|
|
|
|
error = xfarray_load(rr->refcount_records, rr->array_cur++,
|
|
|
|
|
irec);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
block_rec = xfs_btree_rec_addr(cur, idx, block);
|
|
|
|
|
cur->bc_ops->init_rec_from_cur(cur, block_rec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return loaded;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Feed one of the new btree blocks to the bulk loader. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_claim_block(
|
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
|
union xfs_btree_ptr *ptr,
|
|
|
|
|
void *priv)
|
|
|
|
|
{
|
|
|
|
|
struct xrep_refc *rr = priv;
|
|
|
|
|
|
|
|
|
|
return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Update the AGF counters. */
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_reset_counters(
|
|
|
|
|
struct xrep_refc *rr)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
|
struct xfs_perag *pag = sc->sa.pag;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* After we commit the new btree to disk, it is possible that the
|
|
|
|
|
* process to reap the old btree blocks will race with the AIL trying
|
|
|
|
|
* to checkpoint the old btree blocks into the filesystem. If the new
|
|
|
|
|
* tree is shorter than the old one, the refcountbt write verifier will
|
|
|
|
|
* fail and the AIL will shut down the filesystem.
|
|
|
|
|
*
|
|
|
|
|
* To avoid this, save the old incore btree height values as the alt
|
|
|
|
|
* height values before re-initializing the perag info from the updated
|
|
|
|
|
* AGF to capture all the new values.
|
|
|
|
|
*/
|
|
|
|
|
pag->pagf_repair_refcount_level = pag->pagf_refcount_level;
|
|
|
|
|
|
|
|
|
|
/* Reinitialize with the values we just logged. */
|
|
|
|
|
return xrep_reinit_pagf(sc);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Use the collected refcount information to stage a new refcount btree. If
|
|
|
|
|
* this is successful we'll return with the new btree root information logged
|
|
|
|
|
* to the repair transaction but not yet committed.
|
|
|
|
|
*/
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_build_new_tree(
|
|
|
|
|
struct xrep_refc *rr)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
|
struct xfs_btree_cur *refc_cur;
|
|
|
|
|
struct xfs_perag *pag = sc->sa.pag;
|
|
|
|
|
xfs_fsblock_t fsbno;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
error = xrep_refc_sort_records(rr);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Prepare to construct the new btree by reserving disk space for the
|
|
|
|
|
* new btree and setting up all the accounting information we'll need
|
|
|
|
|
* to root the new btree while it's under construction and before we
|
|
|
|
|
* attach it to the AG header.
|
|
|
|
|
*/
|
|
|
|
|
fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp));
|
|
|
|
|
xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno,
|
|
|
|
|
XFS_AG_RESV_METADATA);
|
|
|
|
|
rr->new_btree.bload.get_records = xrep_refc_get_records;
|
|
|
|
|
rr->new_btree.bload.claim_block = xrep_refc_claim_block;
|
|
|
|
|
|
|
|
|
|
/* Compute how many blocks we'll need. */
|
|
|
|
|
refc_cur = xfs_refcountbt_stage_cursor(sc->mp, &rr->new_btree.afake,
|
|
|
|
|
pag);
|
|
|
|
|
error = xfs_btree_bload_compute_geometry(refc_cur,
|
|
|
|
|
&rr->new_btree.bload,
|
|
|
|
|
xfarray_length(rr->refcount_records));
|
|
|
|
|
if (error)
|
|
|
|
|
goto err_cur;
|
|
|
|
|
|
|
|
|
|
/* Last chance to abort before we start committing fixes. */
|
|
|
|
|
if (xchk_should_terminate(sc, &error))
|
|
|
|
|
goto err_cur;
|
|
|
|
|
|
|
|
|
|
/* Reserve the space we'll need for the new btree. */
|
|
|
|
|
error = xrep_newbt_alloc_blocks(&rr->new_btree,
|
|
|
|
|
rr->new_btree.bload.nr_blocks);
|
|
|
|
|
if (error)
|
|
|
|
|
goto err_cur;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Due to btree slack factors, it's possible for a new btree to be one
|
|
|
|
|
* level taller than the old btree. Update the incore btree height so
|
|
|
|
|
* that we don't trip the verifiers when writing the new btree blocks
|
|
|
|
|
* to disk.
|
|
|
|
|
*/
|
|
|
|
|
pag->pagf_repair_refcount_level = rr->new_btree.bload.btree_height;
|
|
|
|
|
|
|
|
|
|
/* Add all observed refcount records. */
|
|
|
|
|
rr->array_cur = XFARRAY_CURSOR_INIT;
|
|
|
|
|
error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto err_level;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Install the new btree in the AG header. After this point the old
|
|
|
|
|
* btree is no longer accessible and the new tree is live.
|
|
|
|
|
*/
|
|
|
|
|
xfs_refcountbt_commit_staged_btree(refc_cur, sc->tp, sc->sa.agf_bp);
|
|
|
|
|
xfs_btree_del_cursor(refc_cur, 0);
|
|
|
|
|
|
|
|
|
|
/* Reset the AGF counters now that we've changed the btree shape. */
|
|
|
|
|
error = xrep_refc_reset_counters(rr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto err_newbt;
|
|
|
|
|
|
|
|
|
|
/* Dispose of any unused blocks and the accounting information. */
|
|
|
|
|
error = xrep_newbt_commit(&rr->new_btree);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
return xrep_roll_ag_trans(sc);
|
|
|
|
|
|
|
|
|
|
err_level:
|
|
|
|
|
pag->pagf_repair_refcount_level = 0;
|
|
|
|
|
err_cur:
|
|
|
|
|
xfs_btree_del_cursor(refc_cur, error);
|
|
|
|
|
err_newbt:
|
|
|
|
|
xrep_newbt_cancel(&rr->new_btree);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now that we've logged the roots of the new btrees, invalidate all of the
|
|
|
|
|
* old blocks and free them.
|
|
|
|
|
*/
|
|
|
|
|
STATIC int
|
|
|
|
|
xrep_refc_remove_old_tree(
|
|
|
|
|
struct xrep_refc *rr)
|
|
|
|
|
{
|
|
|
|
|
struct xfs_scrub *sc = rr->sc;
|
|
|
|
|
struct xfs_perag *pag = sc->sa.pag;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
/* Free the old refcountbt blocks if they're not in use. */
|
|
|
|
|
error = xrep_reap_agblocks(sc, &rr->old_refcountbt_blocks,
|
|
|
|
|
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
|
|
|
|
|
if (error)
|
|
|
|
|
return error;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now that we've zapped all the old refcountbt blocks we can turn off
|
|
|
|
|
* the alternate height mechanism and reset the per-AG space
|
|
|
|
|
* reservations.
|
|
|
|
|
*/
|
|
|
|
|
pag->pagf_repair_refcount_level = 0;
|
|
|
|
|
sc->flags |= XREP_RESET_PERAG_RESV;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Rebuild the refcount btree. */
|
|
|
|
|
int
|
|
|
|
|
xrep_refcountbt(
|
|
|
|
|
struct xfs_scrub *sc)
|
|
|
|
|
{
|
|
|
|
|
struct xrep_refc *rr;
|
|
|
|
|
struct xfs_mount *mp = sc->mp;
|
|
|
|
|
char *descr;
|
|
|
|
|
int error;
|
|
|
|
|
|
|
|
|
|
/* We require the rmapbt to rebuild anything. */
|
|
|
|
|
if (!xfs_has_rmapbt(mp))
|
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
|
|
rr = kzalloc(sizeof(struct xrep_refc), XCHK_GFP_FLAGS);
|
|
|
|
|
if (!rr)
|
|
|
|
|
return -ENOMEM;
|
|
|
|
|
rr->sc = sc;
|
|
|
|
|
|
|
|
|
|
/* Set up enough storage to handle one refcount record per block. */
|
|
|
|
|
descr = xchk_xfile_ag_descr(sc, "reference count records");
|
|
|
|
|
error = xfarray_create(descr, mp->m_sb.sb_agblocks,
|
|
|
|
|
sizeof(struct xfs_refcount_irec),
|
|
|
|
|
&rr->refcount_records);
|
|
|
|
|
kfree(descr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_rr;
|
|
|
|
|
|
|
|
|
|
/* Collect all reference counts. */
|
|
|
|
|
xagb_bitmap_init(&rr->old_refcountbt_blocks);
|
|
|
|
|
error = xrep_refc_find_refcounts(rr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bitmap;
|
|
|
|
|
|
|
|
|
|
/* Rebuild the refcount information. */
|
|
|
|
|
error = xrep_refc_build_new_tree(rr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bitmap;
|
|
|
|
|
|
|
|
|
|
/* Kill the old tree. */
|
|
|
|
|
error = xrep_refc_remove_old_tree(rr);
|
|
|
|
|
if (error)
|
|
|
|
|
goto out_bitmap;
|
|
|
|
|
|
|
|
|
|
out_bitmap:
|
|
|
|
|
xagb_bitmap_destroy(&rr->old_refcountbt_blocks);
|
|
|
|
|
xfarray_destroy(rr->refcount_records);
|
|
|
|
|
out_rr:
|
|
|
|
|
kfree(rr);
|
|
|
|
|
return error;
|
|
|
|
|
}
|