mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 21:21:41 +00:00
bcachefs fixes for 6.12
- Assorted tiny syzbot fixes - Shutdown path fix: "bch2_btree_write_buffer_flush_going_ro()" The shutdown path wasn't flushing the btree write buffer, leading to shutting down while we still had operations in flight. This fixes a whole slew of syzbot bugs, and undoubtedly other strange heisenbugs. -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmc1ceQACgkQE6szbY3K bna56w//V4On/z8q8r4vyj44NXfFNXL87QNB7e5BAP8lmow8S1KnqpHlpUq+MaJm /LI1OPPaDJITokrQ4eUKyfRLQaH9PlDk7WafgzixnYmaXKVIraqGzWfISSbNhLdC Hw/EjYdqmuQMAxTgj1upyx2UijvBRaT0dvjeaTX2OgQiqIlBUWGdUDFHWzc1sRgO 8PrJf9CYscdS+P+XrDBKLLCiKGn73Z93lSsFTwVzDictfrqUT8X5J5/a+ofCwp8W sEiL/13bp05ABb3YjVrBupApyY6szC6YrKNwJ8RoHxBGo0zl5L/gd9FnBJX0T1wc 10dDMv4gZWf9mF2KePDH5EYBBV62N8qEwRAHrjkGu/87mNBRf6sUZ3fbTiMMmVds nSTJUHH6t6uiwtezPPzDKtPYQBcGsP95DBdvL4un97jTlUtszHLU8OqVErojTYzZ sZLilUUMz+CK/qfoJjUcSJwslevHeiri5Hto4jOvf8M7uH0Xx29LvmwqWLdZP2uT RNjslFLvqzZoWGsbJEfe2YihJwJd9kkiE/0xQK60169bzoNEI6oWNmOK2Ts1Dr0X tEkdHmWqNI65+nbjomGOa3u1wysrACuaRwu6fq0562+IvlDtVXLUWbmlS58+bqz2 KwUmZp4Y2OibeKFT+y0iqxpw9WBPQb7dwj9xwpwk3xgph1uVwbY= =L6kv -----END PGP SIGNATURE----- Merge tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs Pull bcachefs fixes from Kent Overstreet: "This fixes one minor regression from the btree cache fixes (in the scan_for_btree_nodes repair path) - and the shutdown path fix is the big one here, in terms of bugs closed: - Assorted tiny syzbot fixes - Shutdown path fix: "bch2_btree_write_buffer_flush_going_ro()" The shutdown path wasn't flushing the btree write buffer, leading to shutting down while we still had operations in flight. This fixes a whole slew of syzbot bugs, and undoubtedly other strange heisenbugs. * tag 'bcachefs-2024-11-13' of git://evilpiepirate.org/bcachefs: bcachefs: Fix assertion pop in bch2_ptr_swab() bcachefs: Fix journal_entry_dev_usage_to_text() overrun bcachefs: Allow for unknown key types in backpointers fsck bcachefs: Fix assertion pop in topology repair bcachefs: Fix hidden btree errors when reading roots bcachefs: Fix validate_bset() repair path bcachefs: Fix missing validation for bch_backpointer.level bcachefs: Fix bch_member.btree_bitmap_shift validation bcachefs: bch2_btree_write_buffer_flush_going_ro()
This commit is contained in:
commit
4abcd80f23
@ -52,6 +52,12 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
enum bch_validate_flags flags)
|
||||
{
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
|
||||
c, backpointer_level_bad,
|
||||
"backpointer level bad: %u >= %u",
|
||||
bp.v->level, BTREE_MAX_DEPTH);
|
||||
|
||||
rcu_read_lock();
|
||||
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
|
||||
@ -64,7 +70,6 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p);
|
||||
struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset);
|
||||
rcu_read_unlock();
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
|
||||
!bpos_eq(bp.k->p, bp_pos),
|
||||
@ -947,9 +952,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
|
||||
static int check_one_backpointer(struct btree_trans *trans,
|
||||
struct bbpos start,
|
||||
struct bbpos end,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct bkey_s_c bp_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer)
|
||||
return 0;
|
||||
|
||||
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
struct bbpos pos = bp_to_bbpos(*bp.v);
|
||||
@ -1004,9 +1013,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
|
||||
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
|
||||
check_one_backpointer(trans, start, end,
|
||||
bkey_s_c_to_backpointer(k),
|
||||
&last_flushed);
|
||||
check_one_backpointer(trans, start, end, k, &last_flushed);
|
||||
}));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
|
@ -182,7 +182,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
|
||||
bch2_btree_node_drop_keys_outside_node(b);
|
||||
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
__bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, &new->k_i);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
|
@ -733,11 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
c, ca, b, i, NULL,
|
||||
bset_past_end_of_btree_node,
|
||||
"bset past end of btree node (offset %u len %u but written %zu)",
|
||||
offset, sectors, ptr_written ?: btree_sectors(c))) {
|
||||
offset, sectors, ptr_written ?: btree_sectors(c)))
|
||||
i->u64s = 0;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
btree_err_on(offset && !i->u64s,
|
||||
-BCH_ERR_btree_node_read_err_fixable,
|
||||
@ -829,7 +826,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
|
||||
BSET_BIG_ENDIAN(i), write,
|
||||
&bn->format);
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
printbuf_exit(&buf2);
|
||||
printbuf_exit(&buf1);
|
||||
|
@ -2398,7 +2398,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
|
||||
if (new_hash) {
|
||||
mutex_lock(&c->btree_cache.lock);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
|
||||
bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
__bch2_btree_node_hash_remove(&c->btree_cache, b);
|
||||
|
||||
bkey_copy(&b->key, new_key);
|
||||
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
|
||||
|
@ -277,6 +277,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
|
||||
bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
|
||||
int ret = 0;
|
||||
|
||||
ret = bch2_journal_error(&c->journal);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
@ -491,7 +495,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq)
|
||||
static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq,
|
||||
bool *did_work)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_write_buffer *wb = &c->btree_write_buffer;
|
||||
@ -502,6 +507,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq)
|
||||
|
||||
fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
|
||||
|
||||
*did_work |= wb->inc.keys.nr || wb->flushing.keys.nr;
|
||||
|
||||
/*
|
||||
* On memory allocation failure, bch2_btree_write_buffer_flush_locked()
|
||||
* is not guaranteed to empty wb->inc:
|
||||
@ -521,17 +528,34 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
|
||||
struct journal_entry_pin *_pin, u64 seq)
|
||||
{
|
||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||
bool did_work = false;
|
||||
|
||||
return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq));
|
||||
return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work));
|
||||
}
|
||||
|
||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
bool did_work = false;
|
||||
|
||||
trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_);
|
||||
|
||||
return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal));
|
||||
return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* The write buffer requires flushing when going RO: keys in the journal for the
|
||||
* write buffer don't have a journal pin yet
|
||||
*/
|
||||
bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c)
|
||||
{
|
||||
if (bch2_journal_error(&c->journal))
|
||||
return false;
|
||||
|
||||
bool did_work = false;
|
||||
bch2_trans_run(c, btree_write_buffer_flush_seq(trans,
|
||||
journal_cur_seq(&c->journal), &did_work));
|
||||
return did_work;
|
||||
}
|
||||
|
||||
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
|
||||
|
@ -21,6 +21,7 @@ static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
|
||||
|
||||
struct btree_trans;
|
||||
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
||||
bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *);
|
||||
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
|
||||
|
||||
|
@ -1364,7 +1364,7 @@ void bch2_ptr_swab(struct bkey_s k)
|
||||
for (entry = ptrs.start;
|
||||
entry < ptrs.end;
|
||||
entry = extent_entry_next(entry)) {
|
||||
switch (extent_entry_type(entry)) {
|
||||
switch (__extent_entry_type(entry)) {
|
||||
case BCH_EXTENT_ENTRY_ptr:
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_crc32:
|
||||
@ -1384,6 +1384,9 @@ void bch2_ptr_swab(struct bkey_s k)
|
||||
break;
|
||||
case BCH_EXTENT_ENTRY_rebalance:
|
||||
break;
|
||||
default:
|
||||
/* Bad entry type: will be caught by validate() */
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -708,6 +708,9 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
|
||||
container_of(entry, struct jset_entry_dev_usage, entry);
|
||||
unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
|
||||
|
||||
if (vstruct_bytes(entry) < sizeof(*u))
|
||||
return;
|
||||
|
||||
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
|
||||
|
||||
printbuf_indent_add(out, 2);
|
||||
|
@ -27,6 +27,12 @@ const char * const bch2_recovery_passes[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
|
||||
static int bch2_recovery_pass_empty(struct bch_fs *c)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch2_set_may_go_rw(struct bch_fs *c)
|
||||
{
|
||||
struct journal_keys *keys = &c->journal_keys;
|
||||
|
@ -13,6 +13,7 @@
|
||||
* must never change:
|
||||
*/
|
||||
#define BCH_RECOVERY_PASSES() \
|
||||
x(recovery_pass_empty, 41, PASS_SILENT) \
|
||||
x(scan_for_btree_nodes, 37, 0) \
|
||||
x(check_topology, 4, 0) \
|
||||
x(accounting_read, 39, PASS_ALWAYS) \
|
||||
|
@ -136,7 +136,9 @@ enum bch_fsck_flags {
|
||||
x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \
|
||||
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
|
||||
x(need_discard_freespace_key_bad, 124, 0) \
|
||||
x(discarding_bucket_not_in_need_discard_btree, 291, 0) \
|
||||
x(backpointer_bucket_offset_wrong, 125, 0) \
|
||||
x(backpointer_level_bad, 294, 0) \
|
||||
x(backpointer_to_missing_device, 126, 0) \
|
||||
x(backpointer_to_missing_alloc, 127, 0) \
|
||||
x(backpointer_to_missing_ptr, 128, 0) \
|
||||
@ -177,7 +179,9 @@ enum bch_fsck_flags {
|
||||
x(ptr_stripe_redundant, 163, 0) \
|
||||
x(reservation_key_nr_replicas_invalid, 164, 0) \
|
||||
x(reflink_v_refcount_wrong, 165, 0) \
|
||||
x(reflink_v_pos_bad, 292, 0) \
|
||||
x(reflink_p_to_missing_reflink_v, 166, 0) \
|
||||
x(reflink_refcount_underflow, 293, 0) \
|
||||
x(stripe_pos_bad, 167, 0) \
|
||||
x(stripe_val_size_bad, 168, 0) \
|
||||
x(stripe_csum_granularity_bad, 290, 0) \
|
||||
@ -302,7 +306,7 @@ enum bch_fsck_flags {
|
||||
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
|
||||
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
|
||||
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
|
||||
x(MAX, 291, 0)
|
||||
x(MAX, 295, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -163,7 +163,7 @@ static int validate_member(struct printbuf *err,
|
||||
return -BCH_ERR_invalid_sb_members;
|
||||
}
|
||||
|
||||
if (m.btree_bitmap_shift >= 64) {
|
||||
if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) {
|
||||
prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift);
|
||||
return -BCH_ERR_invalid_sb_members;
|
||||
}
|
||||
@ -450,7 +450,7 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
|
||||
m->btree_bitmap_shift += resize;
|
||||
}
|
||||
|
||||
BUG_ON(m->btree_bitmap_shift > 57);
|
||||
BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX);
|
||||
BUG_ON(end > 64ULL << m->btree_bitmap_shift);
|
||||
|
||||
for (unsigned bit = start >> m->btree_bitmap_shift;
|
||||
|
@ -65,6 +65,12 @@ struct bch_member {
|
||||
__le32 last_journal_bucket_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* btree_allocated_bitmap can represent sector addresses of a u64: it itself has
|
||||
* 64 elements, so 64 - ilog2(64)
|
||||
*/
|
||||
#define BCH_MI_BTREE_BITMAP_SHIFT_MAX 58
|
||||
|
||||
/*
|
||||
* This limit comes from the bucket_gens array - it's a single allocation, and
|
||||
* kernel allocation are limited to INT_MAX
|
||||
|
@ -272,6 +272,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
|
||||
clean_passes++;
|
||||
|
||||
if (bch2_btree_interior_updates_flush(c) ||
|
||||
bch2_btree_write_buffer_flush_going_ro(c) ||
|
||||
bch2_journal_flush_all_pins(&c->journal) ||
|
||||
bch2_btree_flush_all_writes(c) ||
|
||||
seq != atomic64_read(&c->journal.seq)) {
|
||||
|
Loading…
Reference in New Issue
Block a user