diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index eda6a6ac3c6e..1cb5787f5a6c 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -469,19 +469,60 @@ out: return ret; } -static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, struct bpos pos) +struct snapshots_seen_entry { + u32 id; + u32 equiv; +}; + +struct snapshots_seen { + struct bpos pos; + DARRAY(struct snapshots_seen_entry) ids; +}; + +static inline void snapshots_seen_exit(struct snapshots_seen *s) { - pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; + darray_exit(&s->ids); +} + +static inline void snapshots_seen_init(struct snapshots_seen *s) +{ + memset(s, 0, sizeof(*s)); +} + +static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, + enum btree_id btree_id, struct bpos pos) +{ + struct snapshots_seen_entry *i, n = { + .id = pos.snapshot, + .equiv = bch2_snapshot_equiv(c, pos.snapshot), + }; + int ret; if (bkey_cmp(s->pos, pos)) s->ids.nr = 0; + + pos.snapshot = n.equiv; s->pos = pos; - /* Might get called multiple times due to lock restarts */ - if (s->ids.nr && s->ids.data[s->ids.nr - 1] == pos.snapshot) - return 0; + darray_for_each(s->ids, i) + if (i->equiv == n.equiv) { + if (i->id != n.id) { + bch_err(c, "snapshot deletion did not run correctly:\n" + " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", + bch2_btree_ids[btree_id], + pos.inode, pos.offset, + i->id, n.id, n.equiv); + return -EINVAL; + } - return snapshots_seen_add(c, s, pos.snapshot); + return 0; + } + + ret = darray_push(&s->ids, n); + if (ret) + bch_err(c, "error reallocating snapshots_seen table (size %zu)", + s->ids.size); + return ret; } /** @@ -494,15 +535,15 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see u32 id, u32 ancestor) { ssize_t i; + u32 top = seen->ids.nr ? seen->ids.data[seen->ids.nr - 1].equiv : 0; BUG_ON(id > ancestor); - - id = snapshot_t(c, id)->equiv; - ancestor = snapshot_t(c, ancestor)->equiv; + BUG_ON(!bch2_snapshot_is_equiv(c, id)); + BUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); /* @ancestor should be the snapshot most recently added to @seen */ - BUG_ON(!seen->ids.nr || seen->ids.data[seen->ids.nr - 1] != ancestor); - BUG_ON(seen->pos.snapshot != ancestor); + BUG_ON(ancestor != seen->pos.snapshot); + BUG_ON(ancestor != top); if (id == ancestor) return true; @@ -511,10 +552,10 @@ static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *see return false; for (i = seen->ids.nr - 2; - i >= 0 && seen->ids.data[i] >= id; + i >= 0 && seen->ids.data[i].equiv >= id; --i) - if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i]) && - bch2_snapshot_is_ancestor(c, seen->ids.data[i], ancestor)) + if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv) && + bch2_snapshot_is_ancestor(c, seen->ids.data[i].equiv, ancestor)) return false; return true; @@ -539,8 +580,9 @@ static int ref_visible(struct bch_fs *c, struct snapshots_seen *s, : bch2_snapshot_is_ancestor(c, src, dst); } -#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ - for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && (_i)->snapshot <= (_snapshot); _i++)\ +#define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ + for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ + (_i)->snapshot <= (_snapshot); _i++) \ if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) struct inode_walker_entry { @@ -575,7 +617,7 @@ static int add_inode(struct bch_fs *c, struct inode_walker *w, return darray_push(&w->inodes, ((struct inode_walker_entry) { .inode = u, - .snapshot = snapshot_t(c, inode.k->p.snapshot)->equiv, + .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot), })); } @@ -585,10 +627,10 @@ static int __walk_inode(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - unsigned i, ancestor_pos; + unsigned i; int ret; - pos.snapshot = snapshot_t(c, pos.snapshot)->equiv; + pos.snapshot = bch2_snapshot_equiv(c, pos.snapshot); if (pos.inode == w->cur_inum) { w->first_this_inode = false; @@ -621,17 +663,20 @@ found: BUG_ON(pos.snapshot > w->inodes.data[i].snapshot); if (pos.snapshot != w->inodes.data[i].snapshot) { - ancestor_pos = i; + struct inode_walker_entry e = w->inodes.data[i]; + + e.snapshot = pos.snapshot; + e.count = 0; + + bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u", + pos.inode, pos.snapshot, w->inodes.data[i].snapshot); while (i && w->inodes.data[i - 1].snapshot > pos.snapshot) --i; - ret = darray_insert_item(&w->inodes, i, w->inodes.data[ancestor_pos]); + ret = darray_insert_item(&w->inodes, i, e); if (ret) return ret; - - w->inodes.data[i].snapshot = pos.snapshot; - w->inodes.data[i].count = 0; } return i; @@ -651,17 +696,19 @@ static int __get_visible_inodes(struct btree_trans *trans, for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, inum), BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); + if (k.k->p.offset != inum) break; - if (!bkey_is_inode(k.k)) + if (!ref_visible(c, s, s->pos.snapshot, equiv)) continue; - if (ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) { + if (bkey_is_inode(k.k)) add_inode(c, w, k); - if (k.k->p.snapshot >= s->pos.snapshot) - break; - } + + if (equiv >= s->pos.snapshot) + break; } bch2_trans_iter_exit(trans, &iter); @@ -676,7 +723,7 @@ static int check_key_has_snapshot(struct btree_trans *trans, struct printbuf buf = PRINTBUF; int ret = 0; - if (mustfix_fsck_err_on(!snapshot_t(c, k.k->p.snapshot)->equiv, c, + if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, "key in missing snapshot: %s", (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) ret = bch2_btree_delete_at(trans, iter, @@ -784,6 +831,7 @@ fsck_err: static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *prev, + struct snapshots_seen *s, bool full) { struct bch_fs *c = trans->c; @@ -806,6 +854,10 @@ static int check_inode(struct btree_trans *trans, if (ret) return 0; + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); + if (ret) + goto err; + /* * if snapshot id isn't a leaf node, skip it - deletion in * particular is not atomic, so on the internal snapshot nodes @@ -928,8 +980,10 @@ static int check_inodes(struct bch_fs *c, bool full) struct btree_trans trans; struct btree_iter iter; struct bch_inode_unpacked prev = { 0 }; + struct snapshots_seen s; int ret; + snapshots_seen_init(&s); bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0); bch2_trans_iter_init(&trans, &iter, BTREE_ID_inodes, POS_MIN, @@ -941,13 +995,14 @@ static int check_inodes(struct bch_fs *c, bool full) ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_LAZY_RW| BTREE_INSERT_NOFAIL, - check_inode(&trans, &iter, &prev, full)); + check_inode(&trans, &iter, &prev, &s, full)); if (ret) break; } while (bch2_btree_iter_advance(&iter)); bch2_trans_iter_exit(&trans, &iter); bch2_trans_exit(&trans); + snapshots_seen_exit(&s); if (ret) bch_err(c, "error %i from check_inodes()", ret); return ret; @@ -1096,6 +1151,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k; struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; + struct bpos equiv; int ret = 0; peek: k = bch2_btree_iter_peek(iter); @@ -1112,7 +1168,10 @@ peek: goto out; } - ret = snapshots_seen_update(c, s, k.k->p); + equiv = k.k->p; + equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); + + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) goto err; @@ -1147,7 +1206,7 @@ peek: } } #endif - ret = __walk_inode(trans, inode, k.k->p); + ret = __walk_inode(trans, inode, equiv); if (ret < 0) goto err; @@ -1179,8 +1238,8 @@ peek: goto out; } - if (!bch2_snapshot_internal_node(c, k.k->p.snapshot)) { - for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) { + if (!bch2_snapshot_internal_node(c, equiv.snapshot)) { + for_each_visible_inode(c, s, inode, equiv.snapshot, i) { if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && k.k->type != KEY_TYPE_reservation && k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9, c, @@ -1189,7 +1248,7 @@ peek: bch2_fs_lazy_rw(c); ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, SPOS(k.k->p.inode, round_up(i->inode.bi_size, block_bytes(c)) >> 9, - k.k->p.snapshot), + equiv.snapshot), POS(k.k->p.inode, U64_MAX), 0, NULL) ?: -EINTR; goto out; @@ -1198,7 +1257,7 @@ peek: } if (bkey_extent_is_allocation(k.k)) - for_each_visible_inode(c, s, inode, k.k->p.snapshot, i) + for_each_visible_inode(c, s, inode, equiv.snapshot, i) i->count += k.k->size; #if 0 bch2_bkey_buf_reassemble(&prev, c, k); @@ -1433,6 +1492,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d; struct inode_walker_entry *i; struct printbuf buf = PRINTBUF; + struct bpos equiv; int ret = 0; peek: k = bch2_btree_iter_peek(iter); @@ -1449,7 +1509,10 @@ peek: goto out; } - ret = snapshots_seen_update(c, s, k.k->p); + equiv = k.k->p; + equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); + + ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); if (ret) goto err; @@ -1467,7 +1530,7 @@ peek: goto peek; } - ret = __walk_inode(trans, dir, k.k->p); + ret = __walk_inode(trans, dir, equiv); if (ret < 0) goto err; @@ -1567,7 +1630,8 @@ peek: goto err; if (fsck_err_on(!target->inodes.nr, c, - "dirent points to missing inode:\n%s", + "dirent points to missing inode: (equiv %u)\n%s", + equiv.snapshot, (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { @@ -1585,7 +1649,7 @@ peek: } if (d.v->d_type == DT_DIR) - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) + for_each_visible_inode(c, s, dir, equiv.snapshot, i) i->count++; out: @@ -1841,7 +1905,7 @@ static int check_path(struct btree_trans *trans, struct bch_fs *c = trans->c; int ret = 0; - snapshot = snapshot_t(c, snapshot)->equiv; + snapshot = bch2_snapshot_equiv(c, snapshot); p->nr = 0; while (!(inode->bi_inum == BCACHEFS_ROOT_INO && @@ -2126,7 +2190,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links BTREE_ITER_INTENT| BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, ret) { - ret = snapshots_seen_update(c, &s, k.k->p); + ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); if (ret) break; @@ -2138,7 +2202,7 @@ static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links d.v->d_type != DT_SUBVOL) inc_link(c, &s, links, range_start, range_end, le64_to_cpu(d.v->d_inum), - d.k->p.snapshot); + bch2_snapshot_equiv(c, d.k->p.snapshot)); break; } } diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index 7823040c9641..02a636644988 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -27,6 +27,16 @@ static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id) return snapshot_t(c, id)->parent; } +static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id) +{ + return snapshot_t(c, id)->equiv; +} + +static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id) +{ + return id == snapshot_t(c, id)->equiv; +} + static inline u32 bch2_snapshot_internal_node(struct bch_fs *c, u32 id) { struct snapshot_t *s = snapshot_t(c, id); @@ -58,31 +68,6 @@ static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ances return id == ancestor; } -struct snapshots_seen { - struct bpos pos; - DARRAY(u32) ids; -}; - -static inline void snapshots_seen_exit(struct snapshots_seen *s) -{ - kfree(s->ids.data); - s->ids.data = NULL; -} - -static inline void snapshots_seen_init(struct snapshots_seen *s) -{ - memset(s, 0, sizeof(*s)); -} - -static inline int snapshots_seen_add(struct bch_fs *c, struct snapshots_seen *s, u32 id) -{ - int ret = darray_push(&s->ids, id); - if (ret) - bch_err(c, "error reallocating snapshots_seen table (size %zu)", - s->ids.size); - return ret; -} - static inline bool snapshot_list_has_id(snapshot_id_list *s, u32 id) { u32 *i;