bcachefs fixes for 6.12

- Assorted tiny syzbot fixes
 - Shutdown path fix: "bch2_btree_write_buffer_flush_going_ro()"
 
   The shutdown path wasn't flushing the btree write buffer, leading to
   shutting down while we still had operations in flight. This fixes a
   whole slew of syzbot bugs, and undoubtedly other strange heisenbugs.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmc1ceQACgkQE6szbY3K
 bna56w//V4On/z8q8r4vyj44NXfFNXL87QNB7e5BAP8lmow8S1KnqpHlpUq+MaJm
 /LI1OPPaDJITokrQ4eUKyfRLQaH9PlDk7WafgzixnYmaXKVIraqGzWfISSbNhLdC
 Hw/EjYdqmuQMAxTgj1upyx2UijvBRaT0dvjeaTX2OgQiqIlBUWGdUDFHWzc1sRgO
 8PrJf9CYscdS+P+XrDBKLLCiKGn73Z93lSsFTwVzDictfrqUT8X5J5/a+ofCwp8W
 sEiL/13bp05ABb3YjVrBupApyY6szC6YrKNwJ8RoHxBGo0zl5L/gd9FnBJX0T1wc
 10dDMv4gZWf9mF2KePDH5EYBBV62N8qEwRAHrjkGu/87mNBRf6sUZ3fbTiMMmVds
 nSTJUHH6t6uiwtezPPzDKtPYQBcGsP95DBdvL4un97jTlUtszHLU8OqVErojTYzZ
 sZLilUUMz+CK/qfoJjUcSJwslevHeiri5Hto4jOvf8M7uH0Xx29LvmwqWLdZP2uT
 RNjslFLvqzZoWGsbJEfe2YihJwJd9kkiE/0xQK60169bzoNEI6oWNmOK2Ts1Dr0X
 tEkdHmWqNI65+nbjomGOa3u1wysrACuaRwu6fq0562+IvlDtVXLUWbmlS58+bqz2
 KwUmZp4Y2OibeKFT+y0iqxpw9WBPQb7dwj9xwpwk3xgph1uVwbY=
 =L6kv
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "This fixes one minor regression from the btree cache fixes (in the
  scan_for_btree_nodes repair path) - and the shutdown path fix is the
  big one here, in terms of bugs closed:

   - Assorted tiny syzbot fixes

   - Shutdown path fix: "bch2_btree_write_buffer_flush_going_ro()"

     The shutdown path wasn't flushing the btree write buffer, leading
     to shutting down while we still had operations in flight. This
     fixes a whole slew of syzbot bugs, and undoubtedly other strange
     heisenbugs.

* tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs:
  bcachefs: Fix assertion pop in bch2_ptr_swab()
  bcachefs: Fix journal_entry_dev_usage_to_text() overrun
  bcachefs: Allow for unknown key types in backpointers fsck
  bcachefs: Fix assertion pop in topology repair
  bcachefs: Fix hidden btree errors when reading roots
  bcachefs: Fix validate_bset() repair path
  bcachefs: Fix missing validation for bch_backpointer.level
  bcachefs: Fix bch_member.btree_bitmap_shift validation
  bcachefs: bch2_btree_write_buffer_flush_going_ro()
This commit is contained in:
Linus Torvalds 2024-11-14 10:00:23 -08:00
commit 4abcd80f23
14 changed files with 72 additions and 19 deletions

View File

@ -52,6 +52,12 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags) enum bch_validate_flags flags)
{ {
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
int ret = 0;
bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
c, backpointer_level_bad,
"backpointer level bad: %u >= %u",
bp.v->level, BTREE_MAX_DEPTH);
rcu_read_lock(); rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
@ -64,7 +70,6 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p);
struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset);
rcu_read_unlock(); rcu_read_unlock();
int ret = 0;
bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
!bpos_eq(bp.k->p, bp_pos), !bpos_eq(bp.k->p, bp_pos),
@ -947,9 +952,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
static int check_one_backpointer(struct btree_trans *trans, static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start, struct bbpos start,
struct bbpos end, struct bbpos end,
struct bkey_s_c_backpointer bp, struct bkey_s_c bp_k,
struct bkey_buf *last_flushed) struct bkey_buf *last_flushed)
{ {
if (bp_k.k->type != KEY_TYPE_backpointer)
return 0;
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_iter iter; struct btree_iter iter;
struct bbpos pos = bp_to_bbpos(*bp.v); struct bbpos pos = bp_to_bbpos(*bp.v);
@ -1004,9 +1013,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
POS_MIN, BTREE_ITER_prefetch, k, POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
check_one_backpointer(trans, start, end, check_one_backpointer(trans, start, end, k, &last_flushed);
bkey_s_c_to_backpointer(k),
&last_flushed);
})); }));
bch2_bkey_buf_exit(&last_flushed, c); bch2_bkey_buf_exit(&last_flushed, c);

View File

@ -182,7 +182,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
bch2_btree_node_drop_keys_outside_node(b); bch2_btree_node_drop_keys_outside_node(b);
mutex_lock(&c->btree_cache.lock); mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, b); __bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, &new->k_i); bkey_copy(&b->key, &new->k_i);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);

View File

@ -733,11 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
c, ca, b, i, NULL, c, ca, b, i, NULL,
bset_past_end_of_btree_node, bset_past_end_of_btree_node,
"bset past end of btree node (offset %u len %u but written %zu)", "bset past end of btree node (offset %u len %u but written %zu)",
offset, sectors, ptr_written ?: btree_sectors(c))) { offset, sectors, ptr_written ?: btree_sectors(c)))
i->u64s = 0; i->u64s = 0;
ret = 0;
goto out;
}
btree_err_on(offset && !i->u64s, btree_err_on(offset && !i->u64s,
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
@ -829,7 +826,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BSET_BIG_ENDIAN(i), write, BSET_BIG_ENDIAN(i), write,
&bn->format); &bn->format);
} }
out:
fsck_err: fsck_err:
printbuf_exit(&buf2); printbuf_exit(&buf2);
printbuf_exit(&buf1); printbuf_exit(&buf1);

View File

@ -2398,7 +2398,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
if (new_hash) { if (new_hash) {
mutex_lock(&c->btree_cache.lock); mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, new_hash); bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
bch2_btree_node_hash_remove(&c->btree_cache, b);
__bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, new_key); bkey_copy(&b->key, new_key);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);

View File

@ -277,6 +277,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags); bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
int ret = 0; int ret = 0;
ret = bch2_journal_error(&c->journal);
if (ret)
return ret;
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
bch2_trans_begin(trans); bch2_trans_begin(trans);
@ -491,7 +495,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
return ret; return ret;
} }
static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq) static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq,
bool *did_work)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer; struct btree_write_buffer *wb = &c->btree_write_buffer;
@ -502,6 +507,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq)
fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq); fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
*did_work |= wb->inc.keys.nr || wb->flushing.keys.nr;
/* /*
* On memory allocation failure, bch2_btree_write_buffer_flush_locked() * On memory allocation failure, bch2_btree_write_buffer_flush_locked()
* is not guaranteed to empty wb->inc: * is not guaranteed to empty wb->inc:
@ -521,17 +528,34 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
struct journal_entry_pin *_pin, u64 seq) struct journal_entry_pin *_pin, u64 seq)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool did_work = false;
return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq)); return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work));
} }
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
{ {
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
bool did_work = false;
trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_); trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_);
return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal)); return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work);
}
/*
* The write buffer requires flushing when going RO: keys in the journal for the
* write buffer don't have a journal pin yet
*/
bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c)
{
if (bch2_journal_error(&c->journal))
return false;
bool did_work = false;
bch2_trans_run(c, btree_write_buffer_flush_seq(trans,
journal_cur_seq(&c->journal), &did_work));
return did_work;
} }
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans) int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)

View File

@ -21,6 +21,7 @@ static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
struct btree_trans; struct btree_trans;
int bch2_btree_write_buffer_flush_sync(struct btree_trans *); int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *); int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *); int bch2_btree_write_buffer_tryflush(struct btree_trans *);

View File

@ -1364,7 +1364,7 @@ void bch2_ptr_swab(struct bkey_s k)
for (entry = ptrs.start; for (entry = ptrs.start;
entry < ptrs.end; entry < ptrs.end;
entry = extent_entry_next(entry)) { entry = extent_entry_next(entry)) {
switch (extent_entry_type(entry)) { switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr: case BCH_EXTENT_ENTRY_ptr:
break; break;
case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc32:
@ -1384,6 +1384,9 @@ void bch2_ptr_swab(struct bkey_s k)
break; break;
case BCH_EXTENT_ENTRY_rebalance: case BCH_EXTENT_ENTRY_rebalance:
break; break;
default:
/* Bad entry type: will be caught by validate() */
return;
} }
} }
} }

View File

@ -708,6 +708,9 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
container_of(entry, struct jset_entry_dev_usage, entry); container_of(entry, struct jset_entry_dev_usage, entry);
unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
if (vstruct_bytes(entry) < sizeof(*u))
return;
prt_printf(out, "dev=%u", le32_to_cpu(u->dev)); prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
printbuf_indent_add(out, 2); printbuf_indent_add(out, 2);

View File

@ -27,6 +27,12 @@ const char * const bch2_recovery_passes[] = {
NULL NULL
}; };
/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
static int bch2_recovery_pass_empty(struct bch_fs *c)
{
return 0;
}
static int bch2_set_may_go_rw(struct bch_fs *c) static int bch2_set_may_go_rw(struct bch_fs *c)
{ {
struct journal_keys *keys = &c->journal_keys; struct journal_keys *keys = &c->journal_keys;

View File

@ -13,6 +13,7 @@
* must never change: * must never change:
*/ */
#define BCH_RECOVERY_PASSES() \ #define BCH_RECOVERY_PASSES() \
x(recovery_pass_empty, 41, PASS_SILENT) \
x(scan_for_btree_nodes, 37, 0) \ x(scan_for_btree_nodes, 37, 0) \
x(check_topology, 4, 0) \ x(check_topology, 4, 0) \
x(accounting_read, 39, PASS_ALWAYS) \ x(accounting_read, 39, PASS_ALWAYS) \

View File

@ -136,7 +136,9 @@ enum bch_fsck_flags {
x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
x(need_discard_freespace_key_bad, 124, 0) \ x(need_discard_freespace_key_bad, 124, 0) \
x(discarding_bucket_not_in_need_discard_btree, 291, 0) \
x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \
x(backpointer_level_bad, 294, 0) \
x(backpointer_to_missing_device, 126, 0) \ x(backpointer_to_missing_device, 126, 0) \
x(backpointer_to_missing_alloc, 127, 0) \ x(backpointer_to_missing_alloc, 127, 0) \
x(backpointer_to_missing_ptr, 128, 0) \ x(backpointer_to_missing_ptr, 128, 0) \
@ -177,7 +179,9 @@ enum bch_fsck_flags {
x(ptr_stripe_redundant, 163, 0) \ x(ptr_stripe_redundant, 163, 0) \
x(reservation_key_nr_replicas_invalid, 164, 0) \ x(reservation_key_nr_replicas_invalid, 164, 0) \
x(reflink_v_refcount_wrong, 165, 0) \ x(reflink_v_refcount_wrong, 165, 0) \
x(reflink_v_pos_bad, 292, 0) \
x(reflink_p_to_missing_reflink_v, 166, 0) \ x(reflink_p_to_missing_reflink_v, 166, 0) \
x(reflink_refcount_underflow, 293, 0) \
x(stripe_pos_bad, 167, 0) \ x(stripe_pos_bad, 167, 0) \
x(stripe_val_size_bad, 168, 0) \ x(stripe_val_size_bad, 168, 0) \
x(stripe_csum_granularity_bad, 290, 0) \ x(stripe_csum_granularity_bad, 290, 0) \
@ -302,7 +306,7 @@ enum bch_fsck_flags {
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
x(MAX, 291, 0) x(MAX, 295, 0)
enum bch_sb_error_id { enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n, #define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -163,7 +163,7 @@ static int validate_member(struct printbuf *err,
return -BCH_ERR_invalid_sb_members; return -BCH_ERR_invalid_sb_members;
} }
if (m.btree_bitmap_shift >= 64) { if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) {
prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift); prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift);
return -BCH_ERR_invalid_sb_members; return -BCH_ERR_invalid_sb_members;
} }
@ -450,7 +450,7 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
m->btree_bitmap_shift += resize; m->btree_bitmap_shift += resize;
} }
BUG_ON(m->btree_bitmap_shift > 57); BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX);
BUG_ON(end > 64ULL << m->btree_bitmap_shift); BUG_ON(end > 64ULL << m->btree_bitmap_shift);
for (unsigned bit = start >> m->btree_bitmap_shift; for (unsigned bit = start >> m->btree_bitmap_shift;

View File

@ -65,6 +65,12 @@ struct bch_member {
__le32 last_journal_bucket_offset; __le32 last_journal_bucket_offset;
}; };
/*
* btree_allocated_bitmap can represent sector addresses of a u64: it itself has
* 64 elements, so 64 - ilog2(64)
*/
#define BCH_MI_BTREE_BITMAP_SHIFT_MAX 58
/* /*
* This limit comes from the bucket_gens array - it's a single allocation, and * This limit comes from the bucket_gens array - it's a single allocation, and
* kernel allocation are limited to INT_MAX * kernel allocation are limited to INT_MAX

View File

@ -272,6 +272,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
clean_passes++; clean_passes++;
if (bch2_btree_interior_updates_flush(c) || if (bch2_btree_interior_updates_flush(c) ||
bch2_btree_write_buffer_flush_going_ro(c) ||
bch2_journal_flush_all_pins(&c->journal) || bch2_journal_flush_all_pins(&c->journal) ||
bch2_btree_flush_all_writes(c) || bch2_btree_flush_all_writes(c) ||
seq != atomic64_read(&c->journal.seq)) { seq != atomic64_read(&c->journal.seq)) {