From 6ec8623f7cdd7e32a560fbef1e742bd8bc23925e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 12 Jul 2024 14:16:01 -0400 Subject: [PATCH] bcachefs: btree node scan: fall back to comparing by journal seq highly damaged filesystems, or filesystems that have been damaged and repair and damaged again, may have sequence numbers we can't fully trust - which in itself is something we need to debug. Add a journal_seq fallback so that repair doesn't get stuck. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_io.c | 4 +++ fs/bcachefs/btree_node_scan.c | 51 ++++++++++++++++++++--------- fs/bcachefs/btree_node_scan_types.h | 1 + 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index e092f541c449..db700caf8afa 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1006,6 +1006,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, BTREE_PTR_RANGE_UPDATED(&bkey_i_to_btree_ptr_v2(&b->key)->v); unsigned u64s; unsigned ptr_written = btree_ptr_sectors_written(bkey_i_to_s_c(&b->key)); + u64 max_journal_seq = 0; struct printbuf buf = PRINTBUF; int ret = 0, retry_read = 0, write = READ; u64 start_time = local_clock(); @@ -1181,6 +1182,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sort_iter_add(iter, vstruct_idx(i, 0), vstruct_last(i)); + + max_journal_seq = max(max_journal_seq, le64_to_cpu(i->journal_seq)); } if (ptr_written) { @@ -1217,6 +1220,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, swap(sorted, b->data); set_btree_bset(b, b->set, &b->data->keys); b->nsets = 1; + b->data->keys.journal_seq = cpu_to_le64(max_journal_seq); BUG_ON(b->nr.live_u64s != u64s); diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 2cb0442f6cc9..001107226377 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -22,7 +22,9 @@ struct find_btree_nodes_worker { static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n) { - prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie); + prt_printf(out, "%s l=%u seq=%u journal_seq=%llu cookie=%llx ", + bch2_btree_id_str(n->btree_id), n->level, n->seq, + n->journal_seq, n->cookie); bch2_bpos_to_text(out, n->min_key); prt_str(out, "-"); bch2_bpos_to_text(out, n->max_key); @@ -63,19 +65,37 @@ static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_n memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs); } +static inline u64 bkey_journal_seq(struct bkey_s_c k) +{ + switch (k.k->type) { + case KEY_TYPE_inode_v3: + return le64_to_cpu(bkey_s_c_to_inode_v3(k).v->bi_journal_seq); + default: + return 0; + } +} + static bool found_btree_node_is_readable(struct btree_trans *trans, struct found_btree_node *f) { - struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k; + struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp; - found_btree_node_to_key(&k.k, f); + found_btree_node_to_key(&tmp.k, f); - struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false); + struct btree *b = bch2_btree_node_get_noiter(trans, &tmp.k, f->btree_id, f->level, false); bool ret = !IS_ERR_OR_NULL(b); if (!ret) return ret; f->sectors_written = b->written; + f->journal_seq = le64_to_cpu(b->data->keys.journal_seq); + + struct bkey_s_c k; + struct bkey unpacked; + struct btree_node_iter iter; + for_each_btree_node_key_unpack(b, k, &iter, &unpacked) + f->journal_seq = max(f->journal_seq, bkey_journal_seq(k)); + six_unlock_read(&b->c.lock); /* @@ -84,7 +104,7 @@ static bool found_btree_node_is_readable(struct btree_trans *trans, * this node */ if (b != btree_node_root(trans->c, b)) - bch2_btree_node_evict(trans, &k.k); + bch2_btree_node_evict(trans, &tmp.k); return ret; } @@ -105,7 +125,8 @@ static int found_btree_node_cmp_cookie(const void *_l, const void *_r) static int found_btree_node_cmp_time(const struct found_btree_node *l, const struct found_btree_node *r) { - return cmp_int(l->seq, r->seq); + return cmp_int(l->seq, r->seq) ?: + cmp_int(l->journal_seq, r->journal_seq); } static int found_btree_node_cmp_pos(const void *_l, const void *_r) @@ -309,15 +330,15 @@ again: } else if (n->level) { n->overwritten = true; } else { - struct printbuf buf = PRINTBUF; - - prt_str(&buf, "overlapping btree nodes with same seq! halting\n "); - found_btree_node_to_text(&buf, c, start); - prt_str(&buf, "\n "); - found_btree_node_to_text(&buf, c, n); - bch_err(c, "%s", buf.buf); - printbuf_exit(&buf); - return -BCH_ERR_fsck_repair_unimplemented; + if (bpos_cmp(start->max_key, n->max_key) >= 0) + n->overwritten = true; + else { + n->range_updated = true; + n->min_key = bpos_successor(start->max_key); + n->range_updated = true; + bubble_up(n, end); + goto again; + } } } diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h index 5cfaeb5ac831..b6c36c45d0be 100644 --- a/fs/bcachefs/btree_node_scan_types.h +++ b/fs/bcachefs/btree_node_scan_types.h @@ -11,6 +11,7 @@ struct found_btree_node { u8 level; unsigned sectors_written; u32 seq; + u64 journal_seq; u64 cookie; struct bpos min_key;