bcachefs fixes for 6.10-rc8

- Switch some asserts to WARN()
 - Fix a few "transaction not locked" asserts in the data read retry
   paths and backpointers gc
 - Fix a race that would cause the journal to get stuck on a flush commit
 - Add missing fsck checks for the fragmentation LRU
 - The usual assorted ssorted syzbot fixes
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmaOuRwACgkQE6szbY3K
 bnaCHhAAi9VRqws+zx3fSpe2OMwWqAEWA84QgIFJccy+I86d7dXkqG389gFqJwMG
 9S3BUHP1WooJmpsTRhK5cNtxZuKKOajXlxUYz3onsF7O/U3dHFY5GU7yIIjXS/0o
 q7+iryWAJ4MmlOrAJhgPMH/WlhbSVsjANUN0n/NhlOWHccFGHmpdMTb6aYzb+lfL
 iZOONKmEOR65gLzZYlO323OB2Tv00iEbOZAtxk68BLZYX+WON/j1T1A8gK4G0XSX
 8wcYpXNxGGkCufjBfAbXf4mcp/WygQq0Wj3bdVMFkZ+AwSJDcfGeK1H7f6tJ9e4n
 lqfWL4tgWIckS+41sA96B5cYry9TMDdhu3IeFaAm0ZrF55JT1JySGE1GNA+mo6xA
 mkMAqhG7rwYh6nSJfWX0Ie+zJ9TFbmi05ZbI7jaTuQjnJ5uvPpTuRfBDi+qSWmoi
 +IBDAi9hZgCUNEsLRGDm7RDQo0dpbFo6jpArn1RHK4MO/HkTrqcKpTqiGnfwFAU4
 PFxwq5G9+d38+M6YMX0tXdfQ+fdxroA6aIBJSsIpF18tPRBOBlQsM2GFP34uHbyk
 L6HOzed2QpM5ExBmViX79F+obuDQ/gzXQszYvDKL4QTFNbx43gPWRDrGm8EQen6y
 12EScamXbUWBSWnOqxscmeUsTdTKxLfw/F43JbE2fE7jSxc5tss=
 =VGT8
 -----END PGP SIGNATURE-----

Merge tag 'bcachefs-2024-07-10' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:

 - Switch some asserts to WARN()

 - Fix a few "transaction not locked" asserts in the data read retry
   paths and backpointers gc

 - Fix a race that would cause the journal to get stuck on a flush
   commit

 - Add missing fsck checks for the fragmentation LRU

 - The usual assorted ssorted syzbot fixes

* tag 'bcachefs-2024-07-10' of https://evilpiepirate.org/git/bcachefs: (22 commits)
  bcachefs: Add missing bch2_trans_begin()
  bcachefs: Fix missing error check in journal_entry_btree_keys_validate()
  bcachefs: Warn on attempting a move with no replicas
  bcachefs: bch2_data_update_to_text()
  bcachefs: Log mount failure error code
  bcachefs: Fix undefined behaviour in eytzinger1_first()
  bcachefs: Mark bch_inode_info as SLAB_ACCOUNT
  bcachefs: Fix bch2_inode_insert() race path for tmpfiles
  closures: fix closure_sync + closure debugging
  bcachefs: Fix journal getting stuck on a flush commit
  bcachefs: io clock: run timer fns under clock lock
  bcachefs: Repair fragmentation_lru in alloc_write_key()
  bcachefs: add check for missing fragmentation in check_alloc_to_lru_ref()
  bcachefs: bch2_btree_write_buffer_maybe_flush()
  bcachefs: Add missing printbuf_tabstops_reset() calls
  bcachefs: Fix loop restart in bch2_btree_transactions_read()
  bcachefs: Fix bch2_read_retry_nodecode()
  bcachefs: Don't use the new_fs() bucket alloc path on an initialized fs
  bcachefs: Fix shift greater than integer size
  bcachefs: Change bch2_fs_journal_stop() BUG_ON() to warning
  ...
This commit is contained in:
Linus Torvalds 2024-07-10 11:50:16 -07:00
commit f6963ab4b0
25 changed files with 266 additions and 142 deletions

View File

@ -3,6 +3,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_key_cache.h"
@ -1553,13 +1554,13 @@ err:
}
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
struct btree_iter *alloc_iter)
struct btree_iter *alloc_iter,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter lru_iter;
struct bch_alloc_v4 a_convert;
const struct bch_alloc_v4 *a;
struct bkey_s_c alloc_k, lru_k;
struct bkey_s_c alloc_k;
struct printbuf buf = PRINTBUF;
int ret;
@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = bch2_alloc_to_v4(alloc_k, &a_convert);
if (a->fragmentation_lru) {
ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
a->fragmentation_lru,
alloc_k, last_flushed);
if (ret)
return ret;
}
if (a->data_type != BCH_DATA_cached)
return 0;
@ -1597,41 +1606,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
a = &a_mut->v;
}
lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
lru_pos(alloc_k.k->p.inode,
bucket_to_u64(alloc_k.k->p),
a->io_time[READ]), 0);
ret = bkey_err(lru_k);
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
alloc_k, last_flushed);
if (ret)
return ret;
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
alloc_key_to_missing_lru_entry,
"missing lru entry\n"
" %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
ret = bch2_lru_set(trans,
alloc_k.k->p.inode,
bucket_to_u64(alloc_k.k->p),
a->io_time[READ]);
if (ret)
goto err;
}
goto err;
err:
fsck_err:
bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
{
struct bkey_buf last_flushed;
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
int ret = bch2_trans_run(c,
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
bch2_check_alloc_to_lru_ref(trans, &iter)));
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
bch2_bkey_buf_exit(&last_flushed, c);
bch_err_fn(c, ret);
return ret;
}

View File

@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 24);
percpu_down_read(&c->mark_lock);
@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
nr[c->open_buckets[i].data_type]++;
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 12);
printbuf_tabstop_push(out, 16);
printbuf_tabstop_push(out, 16);

View File

@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
return ret;
}
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return bpos_eq(l.k->p, r.k->p) &&
bkey_bytes(l.k) == bkey_bytes(r.k) &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}
struct extents_to_bp_state {
struct bpos bucket_start;
struct bpos bucket_end;
@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans,
struct btree_iter other_extent_iter = {};
struct printbuf buf = PRINTBUF;
struct bkey_s_c bp_k;
struct bkey_buf tmp;
int ret = 0;
bch2_bkey_buf_init(&tmp);
struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
if (!ca) {
prt_str(&buf, "extent for nonexistent device:bucket ");
@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans,
if (bp_k.k->type != KEY_TYPE_backpointer ||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
bch2_bkey_buf_reassemble(&tmp, c, orig_k);
if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
if (bp.level) {
bch2_trans_unlock(trans);
bch2_btree_interior_updates_flush(c);
}
ret = bch2_btree_write_buffer_flush_sync(trans);
if (ret)
goto err;
bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
if (ret)
goto err;
goto check_existing_bp;
}
@ -589,7 +566,6 @@ err:
fsck_err:
bch2_trans_iter_exit(trans, &other_extent_iter);
bch2_trans_iter_exit(trans, &bp_iter);
bch2_bkey_buf_exit(&tmp, c);
bch2_dev_put(ca);
printbuf_exit(&buf);
return ret;
@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
!((1U << btree) & btree_interior_mask))
continue;
bch2_trans_begin(trans);
__for_each_btree_node(trans, iter, btree,
btree == start.btree ? start.pos : POS_MIN,
0, depth, BTREE_ITER_prefetch, b, ret) {
@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
struct bkey_s_c_backpointer bp,
struct bpos *last_flushed_pos)
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans,
if (ret)
return ret;
if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
*last_flushed_pos = bp.k->p;
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
-BCH_ERR_transaction_restart_write_buffer_flush;
goto out;
}
if (!k.k) {
ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
if (ret)
goto out;
if (fsck_err_on(!k.k, c,
backpointer_to_missing_ptr,
"backpointer for missing %s\n %s",
bp.v->level ? "btree node" : "extent",
(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
goto out;
if (fsck_err(c, backpointer_to_missing_ptr,
"backpointer for missing %s\n %s",
bp.v->level ? "btree node" : "extent",
(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
goto out;
}
}
out:
fsck_err:
@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
struct bbpos start,
struct bbpos end)
{
struct bpos last_flushed_pos = SPOS_MAX;
struct bkey_buf last_flushed;
return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
bch2_bkey_buf_init(&last_flushed);
bkey_init(&last_flushed.k->k);
int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
check_one_backpointer(trans, start, end,
bkey_s_c_to_backpointer(k),
&last_flushed_pos));
&last_flushed));
bch2_bkey_buf_exit(&last_flushed, trans->c);
return ret;
}
int bch2_check_backpointers_to_extents(struct bch_fs *c)

View File

@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
bch2_bkey_format_field_overflows(f, i)) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
u64 packed_max = f->bits_per_field[i]
? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
unsigned packed_bits = min(64, f->bits_per_field[i]);
u64 packed_max = packed_bits
? ~((~0ULL << 1) << (packed_bits - 1))
: 0;
prt_printf(err, "field %u too large: %llu + %llu > %llu",

View File

@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
return bkey_gt(l, r) ? l : r;
}
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
{
return bpos_eq(l.k->p, r.k->p) &&
bkey_bytes(l.k) == bkey_bytes(r.k) &&
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
}
void bch2_bpos_swab(struct bpos *);
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);

View File

@ -903,6 +903,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true);
percpu_up_read(&c->mark_lock);
gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
if (fsck_err_on(new.data_type != gc.data_type, c,
alloc_key_data_type_wrong,
"bucket %llu:%llu gen %u has wrong data_type"
@ -916,23 +918,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
#define copy_bucket_field(_errtype, _f) \
if (fsck_err_on(new._f != gc._f, c, _errtype, \
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
": got %u, should be %u", \
": got %llu, should be %llu", \
iter->pos.inode, iter->pos.offset, \
gc.gen, \
bch2_data_type_str(gc.data_type), \
new._f, gc._f)) \
(u64) new._f, (u64) gc._f)) \
new._f = gc._f; \
copy_bucket_field(alloc_key_gen_wrong,
gen);
copy_bucket_field(alloc_key_dirty_sectors_wrong,
dirty_sectors);
copy_bucket_field(alloc_key_cached_sectors_wrong,
cached_sectors);
copy_bucket_field(alloc_key_stripe_wrong,
stripe);
copy_bucket_field(alloc_key_stripe_redundancy_wrong,
stripe_redundancy);
copy_bucket_field(alloc_key_gen_wrong, gen);
copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors);
copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
copy_bucket_field(alloc_key_stripe_wrong, stripe);
copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
#undef copy_bucket_field
if (!bch2_alloc_v4_cmp(*old, new))
@ -946,7 +944,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
a->v = new;
/*
* The trigger normally makes sure this is set, but we're not running
* The trigger normally makes sure these are set, but we're not running
* triggers:
*/
if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])

View File

@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "btree_write_buffer.h"
#include "error.h"
#include "extents.h"
#include "journal.h"
#include "journal_io.h"
#include "journal_reclaim.h"
@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
return ret;
}
/**
* In check and repair code, when checking references to write buffer btrees we
* need to issue a flush before we have a definitive error: this issues a flush
* if this is a key we haven't yet checked.
*/
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
struct bkey_s_c referring_k,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct bkey_buf tmp;
int ret = 0;
bch2_bkey_buf_init(&tmp);
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
bch2_bkey_buf_reassemble(&tmp, c, referring_k);
if (bkey_is_btree_ptr(referring_k.k)) {
bch2_trans_unlock(trans);
bch2_btree_interior_updates_flush(c);
}
ret = bch2_btree_write_buffer_flush_sync(trans);
if (ret)
goto err;
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
}
err:
bch2_bkey_buf_exit(&tmp, c);
return ret;
}
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);

View File

@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
struct bkey_buf;
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
struct journal_keys_to_wb {
struct btree_write_buffer_keys *wb;
size_t room;

View File

@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
{
struct io_timer *ret = NULL;
spin_lock(&clock->timer_lock);
if (clock->timers.used &&
time_after_eq(now, clock->timers.data[0]->expire))
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
spin_unlock(&clock->timer_lock);
return ret;
}
@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
struct io_timer *timer;
unsigned long now = atomic64_add_return(sectors, &clock->now);
spin_lock(&clock->timer_lock);
while ((timer = get_expired_timer(clock, now)))
timer->fn(timer);
spin_unlock(&clock->timer_lock);
}
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)

View File

@ -5,7 +5,9 @@
#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "compress.h"
#include "data_update.h"
#include "disk_groups.h"
#include "ec.h"
#include "error.h"
#include "extents.h"
@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
}
}
void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
printbuf_tabstop_push(out, 20);
prt_str(out, "rewrite ptrs:\t");
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
prt_newline(out);
prt_str(out, "kill ptrs:\t");
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
prt_newline(out);
prt_str(out, "target:\t");
bch2_target_to_text(out, c, data_opts->target);
prt_newline(out);
prt_str(out, "compression:\t");
bch2_compression_opt_to_text(out, background_compression(*io_opts));
prt_newline(out);
prt_str(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
}
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
{
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
prt_newline(out);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans,
if (!(durability_have + durability_removing))
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
if (!m->op.nr_replicas) {
struct printbuf buf = PRINTBUF;
bch2_data_update_to_text(&buf, m);
WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
printbuf_exit(&buf);
ret = -BCH_ERR_data_update_done;
goto done;
}
m->op.nr_replicas_required = m->op.nr_replicas;
if (reserve_sectors) {

View File

@ -17,6 +17,9 @@ struct data_update_opts {
unsigned write_flags;
};
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
struct bch_io_opts *, struct data_update_opts *);
struct data_update {
/* extent being updated: */
enum btree_id btree_id;
@ -27,6 +30,8 @@ struct data_update {
struct bch_write_op op;
};
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
int bch2_data_update_index_update(struct bch_write_op *);
void bch2_data_update_read_done(struct data_update *,

View File

@ -610,7 +610,7 @@ restart:
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
list_for_each_entry(trans, &c->btree_trans_list, list) {
if ((ulong) trans < i->iter)
if ((ulong) trans <= i->iter)
continue;
i->iter = (ulong) trans;
@ -832,16 +832,16 @@ static const struct file_operations btree_transaction_stats_op = {
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
{
struct btree_trans *trans;
pid_t iter = 0;
ulong iter = 0;
restart:
seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) {
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
if (!task || task->pid <= iter)
list_for_each_entry(trans, &c->btree_trans_list, list) {
if ((ulong) trans <= iter)
continue;
iter = task->pid;
iter = (ulong) trans;
if (!closure_get_not_zero(&trans->ref))
continue;

View File

@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i)
static inline unsigned eytzinger1_first(unsigned size)
{
return rounddown_pow_of_two(size);
return size ? rounddown_pow_of_two(size) : 0;
}
static inline unsigned eytzinger1_last(unsigned size)
@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
static inline unsigned eytzinger1_extra(unsigned size)
{
return (size + 1 - rounddown_pow_of_two(size)) << 1;
return size
? (size + 1 - rounddown_pow_of_two(size)) << 1
: 0;
}
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,

View File

@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
* discard_new_inode() expects it to be set...
*/
inode->v.i_flags |= I_NEW;
/*
* We don't want bch2_evict_inode() to delete the inode on disk,
* we just raced and had another inode in cache. Normally new
* inodes don't have nlink == 0 - except tmpfiles do...
*/
set_nlink(&inode->v, 1);
discard_new_inode(&inode->v);
inode = old;
} else {
@ -2026,6 +2032,8 @@ err_put_super:
__bch2_fs_stop(c);
deactivate_locked_super(sb);
err:
if (ret)
pr_err("error: %s", bch2_err_str(ret));
/*
* On an inconsistency error in recovery we might see an -EROFS derived
* errorcode (from the journal), but we don't want to return that to
@ -2065,7 +2073,8 @@ int __init bch2_vfs_init(void)
{
int ret = -ENOMEM;
bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT |
SLAB_ACCOUNT);
if (!bch2_inode_cache)
goto err;

View File

@ -389,7 +389,6 @@ retry:
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(trans);
if (!bch2_bkey_matches_ptr(c, k,
rbio->pick.ptr,
@ -1004,6 +1003,9 @@ get_bio:
rbio->promote = promote;
INIT_WORK(&rbio->work, NULL);
if (flags & BCH_READ_NODECODE)
orig->pick = pick;
rbio->bio.bi_opf = orig->bio.bi_opf;
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
rbio->bio.bi_end_io = bch2_read_endio;

View File

@ -1095,7 +1095,7 @@ unlock:
return ret;
}
int bch2_dev_journal_alloc(struct bch_dev *ca)
int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
{
unsigned nr;
int ret;
@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
err:
bch_err_fn(ca, ret);
return ret;
@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
if (ca->journal.nr)
continue;
int ret = bch2_dev_journal_alloc(ca);
int ret = bch2_dev_journal_alloc(ca, true);
if (ret) {
percpu_ref_put(&ca->io_ref);
return ret;
@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j)
journal_quiesce(j);
cancel_delayed_work_sync(&j->write_work);
BUG_ON(!bch2_journal_error(j) &&
test_bit(JOURNAL_replay_done, &j->flags) &&
j->last_empty_seq != journal_cur_seq(j));
WARN(!bch2_journal_error(j) &&
test_bit(JOURNAL_replay_done, &j->flags) &&
j->last_empty_seq != journal_cur_seq(j),
"journal shutdown error: cur seq %llu but last empty seq %llu",
journal_cur_seq(j), j->last_empty_seq);
if (!bch2_journal_error(j))
clear_bit(JOURNAL_running, &j->flags);
@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
unsigned long now = jiffies;
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
if (!out->nr_tabstops)
printbuf_tabstop_push(out, 28);
printbuf_tabstops_reset(out);
printbuf_tabstop_push(out, 28);
out->atomic++;
rcu_read_lock();

View File

@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);
int bch2_dev_journal_alloc(struct bch_dev *);
int bch2_dev_journal_alloc(struct bch_dev *, bool);
int bch2_fs_journal_alloc(struct bch_fs *);
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);

View File

@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
flags|BCH_VALIDATE_journal);
if (ret == FSCK_DELETED_KEY)
continue;
else if (ret)
return ret;
k = bkey_next(k);
}
@ -1762,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush)
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
spin_lock(&j->lock);
closure_wait(&j->async_wait, cl);
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
closure_wait(&j->async_wait, cl);
spin_unlock(&j->lock);
continue_at(cl, journal_write_preflush, j->wq);
return;
}
spin_unlock(&j->lock);
continue_at(cl, journal_write_preflush, j->wq);
return;
}
if (w->separate_flush) {

View File

@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = {
NULL
};
int bch2_lru_check_set(struct btree_trans *trans,
u16 lru_id, u64 time,
struct bkey_s_c referring_k,
struct bkey_buf *last_flushed)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
struct btree_iter lru_iter;
struct bkey_s_c lru_k =
bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
lru_pos(lru_id,
bucket_to_u64(referring_k.k->p),
time), 0);
int ret = bkey_err(lru_k);
if (ret)
return ret;
if (lru_k.k->type != KEY_TYPE_set) {
ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed);
if (ret)
goto err;
if (fsck_err(c, alloc_key_to_missing_lru_entry,
"missing %s lru entry\n"
" %s",
bch2_lru_types[lru_type(lru_k)],
(bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
if (ret)
goto err;
}
}
err:
fsck_err:
bch2_trans_iter_exit(trans, &lru_iter);
printbuf_exit(&buf);
return ret;
}
static int bch2_check_lru_key(struct btree_trans *trans,
struct btree_iter *lru_iter,
struct bkey_s_c lru_k,

View File

@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
struct bkey_buf;
int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
int bch2_check_lrus(struct bch_fs *);
#endif /* _BCACHEFS_LRU_H */

View File

@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = {
NULL
};
static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)
{
printbuf_tabstop_push(out, 20);
prt_str(out, "rewrite ptrs:\t");
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
prt_newline(out);
prt_str(out, "kill ptrs:\t");
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
prt_newline(out);
prt_str(out, "target:\t");
bch2_target_to_text(out, c, data_opts->target);
prt_newline(out);
prt_str(out, "compression:\t");
bch2_compression_opt_to_text(out, background_compression(*io_opts));
prt_newline(out);
prt_str(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
}
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
struct bch_io_opts *io_opts,
struct data_update_opts *data_opts)

View File

@ -286,7 +286,8 @@ enum bch_fsck_flags {
x(accounting_mismatch, 272, 0) \
x(accounting_replicas_not_marked, 273, 0) \
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0)
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

View File

@ -563,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock);
EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
free_percpu(c->online_reserved);
if (c->online_reserved) {
u64 v = percpu_u64_get(c->online_reserved);
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
free_percpu(c->online_reserved);
}
darray_exit(&c->btree_roots_extra);
free_percpu(c->pcpu);
@ -1769,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
if (ret)
goto err;
ret = bch2_dev_journal_alloc(ca);
ret = bch2_dev_journal_alloc(ca, true);
bch_err_msg(c, ret, "allocating journal");
if (ret)
goto err;
@ -1929,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
}
if (!ca->journal.nr) {
ret = bch2_dev_journal_alloc(ca);
ret = bch2_dev_journal_alloc(ca, false);
bch_err_msg(ca, ret, "allocating journal");
if (ret)
goto err;

View File

@ -159,6 +159,7 @@ struct closure {
#ifdef CONFIG_DEBUG_CLOSURES
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
#define CLOSURE_MAGIC_STACK 0xc05451cc
unsigned int magic;
struct list_head all;
@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
#ifdef CONFIG_DEBUG_CLOSURES
cl->magic = CLOSURE_MAGIC_STACK;
#endif
}
static inline void closure_init_stack_release(struct closure *cl)
{
memset(cl, 0, sizeof(struct closure));
atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
#ifdef CONFIG_DEBUG_CLOSURES
cl->magic = CLOSURE_MAGIC_STACK;
#endif
}
/**

View File

@ -244,6 +244,9 @@ void closure_debug_destroy(struct closure *cl)
{
unsigned long flags;
if (cl->magic == CLOSURE_MAGIC_STACK)
return;
BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
cl->magic = CLOSURE_MAGIC_DEAD;