bcachefs: Improve snapshots_seen

This makes the snapshots_seen data structure fsck private and improves
it; we now also track the equivalence class for each snapshot id we've
seen, which means we can detect when snapshot deletion hasn't finished
or run correctly (which will otherwise confuse fsck).

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
This commit is contained in:
Kent Overstreet 2022-07-14 02:47:36 -04:00 committed by Kent Overstreet
parent 4ab35c34d5
commit 49124d8a7f
2 changed files with 118 additions and 69 deletions

View File

@ -469,19 +469,60 @@ out:
return ret;
}
static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos)
struct snapshots_seen_entry {
u32 id;
u32 equiv;
};
struct snapshots_seen {
struct bpos pos;
DARRAY(struct snapshots_seen_entry) ids;
};
static inline void snapshots_seen_exit(struct snapshots_seen *s)
{
pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
darray_exit(&s->ids);
}
static inline void snapshots_seen_init(struct snapshots_seen *s)
{
memset(s, 0, sizeof(*s));
}
static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
enum btree_id btree_id, struct bpos pos)
{
struct snapshots_seen_entry *i, n = {
.id = pos.snapshot,
.equiv = bch2_snapshot_equiv(c, pos.snapshot),
};
int ret;
if (bkey_cmp(s->pos, pos))
s->ids.nr = 0;
pos.snapshot = n.equiv;
s->pos = pos;
/* Might get called multiple times due to lock restarts */
if (s->ids.nr && s->ids.data[s->ids.nr - 1] == pos.snapshot)
return 0;
darray_for_each(s->ids, i)
if (i->equiv == n.equiv) {
if (i->id != n.id) {
bch_err(c, "snapshot deletion did not run correctly:\n"
" duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n",
bch2_btree_ids[btree_id],
pos.inode, pos.offset,
i->id, n.id, n.equiv);
return -EINVAL;
}
return snapshots_seen_add(c, s, pos.snapshot);
return 0;
}
ret = darray_push(&s->ids, n);
if (ret)
bch_err(c, "error reallocating snapshots_seen table (size %zu)",
s->ids.size);
return ret;
}
/**
@ -494,15 +535,15 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
u32 id, u32 ancestor)
{
ssize_t i;
u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0;
BUG_ON(id > ancestor);
id = snapshot_t(c, id)->equiv;
ancestor = snapshot_t(c, ancestor)->equiv;
BUG_ON(!bch2_snapshot_is_equiv(c, id));
BUG_ON(!bch2_snapshot_is_equiv(c, ancestor));
/* @ancestor should be the snapshot most recently added to @seen */
BUG_ON(!seen->ids.nr || seen->ids.data[seen->ids.nr - 1] != ancestor);
BUG_ON(seen->pos.snapshot != ancestor);
BUG_ON(ancestor != seen->pos.snapshot);
BUG_ON(ancestor != top);
if (id == ancestor)
return true;
@ -511,10 +552,10 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see
return false;
for (i = seen->ids.nr - 2;
i >= 0 && seen->ids.data[i] >= id;
i >= 0 && seen->ids.data[i].equiv >= id;
--i)
if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]) &&
bch2_snapshot_is_ancestor(c, seen->ids.data[i], ancestor))
if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) &&
bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor))
return false;
return true;
@ -539,8 +580,9 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s,
: bch2_snapshot_is_ancestor(c, src, dst);
}
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && (_i)->snapshot <= (_snapshot); _i++)\
#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \
for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \
(_i)->snapshot <= (_snapshot); _i++) \
if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot))
struct inode_walker_entry {
@ -575,7 +617,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w,
return darray_push(&w->inodes, ((struct inode_walker_entry) {
.inode = u,
.snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv,
.snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot),
}));
}
@ -585,10 +627,10 @@ static int __walk_inode(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_s_c k;
unsigned i, ancestor_pos;
unsigned i;
int ret;
pos.snapshot = snapshot_t(c, pos.snapshot)->equiv;
pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot);
if (pos.inode == w->cur_inum) {
w->first_this_inode = false;
@ -621,17 +663,20 @@ found:
BUG_ON(pos.snapshot > w->inodes.data[i].snapshot);
if (pos.snapshot != w->inodes.data[i].snapshot) {
ancestor_pos = i;
struct inode_walker_entry e = w->inodes.data[i];
e.snapshot = pos.snapshot;
e.count = 0;
bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u",
pos.inode, pos.snapshot, w->inodes.data[i].snapshot);
while (i && w->inodes.data[i - 1].snapshot > pos.snapshot)
--i;
ret = darray_insert_item(&w->inodes, i, w->inodes.data[ancestor_pos]);
ret = darray_insert_item(&w->inodes, i, e);
if (ret)
return ret;
w->inodes.data[i].snapshot = pos.snapshot;
w->inodes.data[i].count = 0;
}
return i;
@ -651,17 +696,19 @@ static int __get_visible_inodes(struct btree_trans *trans,
for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum),
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
if (k.k->p.offset != inum)
break;
if (!bkey_is_inode(k.k))
if (!ref_visible(c, s, s->pos.snapshot, equiv))
continue;
if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) {
if (bkey_is_inode(k.k))
add_inode(c, w, k);
if (k.k->p.snapshot >= s->pos.snapshot)
break;
}
if (equiv >= s->pos.snapshot)
break;
}
bch2_trans_iter_exit(trans, &iter);
@ -676,7 +723,7 @@ static int check_key_has_snapshot(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;
int ret = 0;
if (mustfix_fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c,
if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
"key in missing snapshot: %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
@ -784,6 +831,7 @@ fsck_err:
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *prev,
struct snapshots_seen *s,
bool full)
{
struct bch_fs *c = trans->c;
@ -806,6 +854,10 @@ static int check_inode(struct btree_trans *trans,
if (ret)
return 0;
ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
if (ret)
goto err;
/*
* if snapshot id isn't a leaf node, skip it - deletion in
* particular is not atomic, so on the internal snapshot nodes
@ -928,8 +980,10 @@ static int check_inodes(struct bch_fs *c, bool full)
struct btree_trans trans;
struct btree_iter iter;
struct bch_inode_unpacked prev = { 0 };
struct snapshots_seen s;
int ret;
snapshots_seen_init(&s);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN,
@ -941,13 +995,14 @@ static int check_inodes(struct bch_fs *c, bool full)
ret = commit_do(&trans, NULL, NULL,
BTREE_INSERT_LAZY_RW|
BTREE_INSERT_NOFAIL,
check_inode(&trans, &iter, &prev, full));
check_inode(&trans, &iter, &prev, &s, full));
if (ret)
break;
} while (bch2_btree_iter_advance(&iter));
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
snapshots_seen_exit(&s);
if (ret)
bch_err(c, "error %i from check_inodes()", ret);
return ret;
@ -1096,6 +1151,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c k;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
struct bpos equiv;
int ret = 0;
peek:
k = bch2_btree_iter_peek(iter);
@ -1112,7 +1168,10 @@ peek:
goto out;
}
ret = snapshots_seen_update(c, s, k.k->p);
equiv = k.k->p;
equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
if (ret)
goto err;
@ -1147,7 +1206,7 @@ peek:
}
}
#endif
ret = __walk_inode(trans, inode, k.k->p);
ret = __walk_inode(trans, inode, equiv);
if (ret < 0)
goto err;
@ -1179,8 +1238,8 @@ peek:
goto out;
}
if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) {
for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) {
if (!bch2_snapshot_internal_node(c, equiv.snapshot)) {
for_each_visible_inode(c, s, inode, equiv.snapshot, i) {
if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
k.k->type != KEY_TYPE_reservation &&
k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c,
@ -1189,7 +1248,7 @@ peek:
bch2_fs_lazy_rw(c);
ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents,
SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9,
k.k->p.snapshot),
equiv.snapshot),
POS(k.k->p.inode, U64_MAX),
0, NULL) ?: -EINTR;
goto out;
@ -1198,7 +1257,7 @@ peek:
}
if (bkey_extent_is_allocation(k.k))
for_each_visible_inode(c, s, inode, k.k->p.snapshot, i)
for_each_visible_inode(c, s, inode, equiv.snapshot, i)
i->count += k.k->size;
#if 0
bch2_bkey_buf_reassemble(&prev, c, k);
@ -1433,6 +1492,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct bkey_s_c_dirent d;
struct inode_walker_entry *i;
struct printbuf buf = PRINTBUF;
struct bpos equiv;
int ret = 0;
peek:
k = bch2_btree_iter_peek(iter);
@ -1449,7 +1509,10 @@ peek:
goto out;
}
ret = snapshots_seen_update(c, s, k.k->p);
equiv = k.k->p;
equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot);
ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p);
if (ret)
goto err;
@ -1467,7 +1530,7 @@ peek:
goto peek;
}
ret = __walk_inode(trans, dir, k.k->p);
ret = __walk_inode(trans, dir, equiv);
if (ret < 0)
goto err;
@ -1567,7 +1630,8 @@ peek:
goto err;
if (fsck_err_on(!target->inodes.nr, c,
"dirent points to missing inode:\n%s",
"dirent points to missing inode: (equiv %u)\n%s",
equiv.snapshot,
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) {
@ -1585,7 +1649,7 @@ peek:
}
if (d.v->d_type == DT_DIR)
for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
for_each_visible_inode(c, s, dir, equiv.snapshot, i)
i->count++;
out:
@ -1841,7 +1905,7 @@ static int check_path(struct btree_trans *trans,
struct bch_fs *c = trans->c;
int ret = 0;
snapshot = snapshot_t(c, snapshot)->equiv;
snapshot = bch2_snapshot_equiv(c, snapshot);
p->nr = 0;
while (!(inode->bi_inum == BCACHEFS_ROOT_INO &&
@ -2126,7 +2190,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
BTREE_ITER_INTENT|
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
ret = snapshots_seen_update(c, &s, k.k->p);
ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p);
if (ret)
break;
@ -2138,7 +2202,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links
d.v->d_type != DT_SUBVOL)
inc_link(c, &s, links, range_start, range_end,
le64_to_cpu(d.v->d_inum),
d.k->p.snapshot);
bch2_snapshot_equiv(c, d.k->p.snapshot));
break;
}
}

View File

@ -27,6 +27,16 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
return snapshot_t(c, id)->parent;
}
static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
{
return snapshot_t(c, id)->equiv;
}
static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
{
return id == snapshot_t(c, id)->equiv;
}
static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id)
{
struct snapshot_t *s = snapshot_t(c, id);
@ -58,31 +68,6 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances
return id == ancestor;
}
struct snapshots_seen {
struct bpos pos;
DARRAY(u32) ids;
};
static inline void snapshots_seen_exit(struct snapshots_seen *s)
{
kfree(s->ids.data);
s->ids.data = NULL;
}
static inline void snapshots_seen_init(struct snapshots_seen *s)
{
memset(s, 0, sizeof(*s));
}
static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id)
{
int ret = darray_push(&s->ids, id);
if (ret)
bch_err(c, "error reallocating snapshots_seen table (size %zu)",
s->ids.size);
return ret;
}
static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id)
{
u32 *i;