mirror of
https://github.com/torvalds/linux.git
synced 2024-11-25 13:41:51 +00:00
bcachefs fixes for 6.10-rc8
- Switch some asserts to WARN() - Fix a few "transaction not locked" asserts in the data read retry paths and backpointers gc - Fix a race that would cause the journal to get stuck on a flush commit - Add missing fsck checks for the fragmentation LRU - The usual assorted ssorted syzbot fixes -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEKnAFLkS8Qha+jvQrE6szbY3KbnYFAmaOuRwACgkQE6szbY3K bnaCHhAAi9VRqws+zx3fSpe2OMwWqAEWA84QgIFJccy+I86d7dXkqG389gFqJwMG 9S3BUHP1WooJmpsTRhK5cNtxZuKKOajXlxUYz3onsF7O/U3dHFY5GU7yIIjXS/0o q7+iryWAJ4MmlOrAJhgPMH/WlhbSVsjANUN0n/NhlOWHccFGHmpdMTb6aYzb+lfL iZOONKmEOR65gLzZYlO323OB2Tv00iEbOZAtxk68BLZYX+WON/j1T1A8gK4G0XSX 8wcYpXNxGGkCufjBfAbXf4mcp/WygQq0Wj3bdVMFkZ+AwSJDcfGeK1H7f6tJ9e4n lqfWL4tgWIckS+41sA96B5cYry9TMDdhu3IeFaAm0ZrF55JT1JySGE1GNA+mo6xA mkMAqhG7rwYh6nSJfWX0Ie+zJ9TFbmi05ZbI7jaTuQjnJ5uvPpTuRfBDi+qSWmoi +IBDAi9hZgCUNEsLRGDm7RDQo0dpbFo6jpArn1RHK4MO/HkTrqcKpTqiGnfwFAU4 PFxwq5G9+d38+M6YMX0tXdfQ+fdxroA6aIBJSsIpF18tPRBOBlQsM2GFP34uHbyk L6HOzed2QpM5ExBmViX79F+obuDQ/gzXQszYvDKL4QTFNbx43gPWRDrGm8EQen6y 12EScamXbUWBSWnOqxscmeUsTdTKxLfw/F43JbE2fE7jSxc5tss= =VGT8 -----END PGP SIGNATURE----- Merge tag 'bcachefs-2024-07-10' of https://evilpiepirate.org/git/bcachefs Pull bcachefs fixes from Kent Overstreet: - Switch some asserts to WARN() - Fix a few "transaction not locked" asserts in the data read retry paths and backpointers gc - Fix a race that would cause the journal to get stuck on a flush commit - Add missing fsck checks for the fragmentation LRU - The usual assorted ssorted syzbot fixes * tag 'bcachefs-2024-07-10' of https://evilpiepirate.org/git/bcachefs: (22 commits) bcachefs: Add missing bch2_trans_begin() bcachefs: Fix missing error check in journal_entry_btree_keys_validate() bcachefs: Warn on attempting a move with no replicas bcachefs: bch2_data_update_to_text() bcachefs: Log mount failure error code bcachefs: Fix undefined behaviour in eytzinger1_first() bcachefs: Mark bch_inode_info as SLAB_ACCOUNT bcachefs: Fix bch2_inode_insert() race path for tmpfiles closures: fix closure_sync + closure debugging bcachefs: Fix journal getting stuck on a flush commit bcachefs: io clock: run timer fns under clock lock bcachefs: Repair fragmentation_lru in alloc_write_key() bcachefs: add check for missing fragmentation in check_alloc_to_lru_ref() bcachefs: bch2_btree_write_buffer_maybe_flush() bcachefs: Add missing printbuf_tabstops_reset() calls bcachefs: Fix loop restart in bch2_btree_transactions_read() bcachefs: Fix bch2_read_retry_nodecode() bcachefs: Don't use the new_fs() bucket alloc path on an initialized fs bcachefs: Fix shift greater than integer size bcachefs: Change bch2_fs_journal_stop() BUG_ON() to warning ...
This commit is contained in:
commit
f6963ab4b0
@ -3,6 +3,7 @@
|
||||
#include "alloc_background.h"
|
||||
#include "alloc_foreground.h"
|
||||
#include "backpointers.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_cache.h"
|
||||
#include "btree_io.h"
|
||||
#include "btree_key_cache.h"
|
||||
@ -1553,13 +1554,13 @@ err:
|
||||
}
|
||||
|
||||
static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
struct btree_iter *alloc_iter)
|
||||
struct btree_iter *alloc_iter,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter lru_iter;
|
||||
struct bch_alloc_v4 a_convert;
|
||||
const struct bch_alloc_v4 *a;
|
||||
struct bkey_s_c alloc_k, lru_k;
|
||||
struct bkey_s_c alloc_k;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
int ret;
|
||||
|
||||
@ -1573,6 +1574,14 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
|
||||
a = bch2_alloc_to_v4(alloc_k, &a_convert);
|
||||
|
||||
if (a->fragmentation_lru) {
|
||||
ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START,
|
||||
a->fragmentation_lru,
|
||||
alloc_k, last_flushed);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (a->data_type != BCH_DATA_cached)
|
||||
return 0;
|
||||
|
||||
@ -1597,41 +1606,30 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
|
||||
a = &a_mut->v;
|
||||
}
|
||||
|
||||
lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
|
||||
lru_pos(alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
a->io_time[READ]), 0);
|
||||
ret = bkey_err(lru_k);
|
||||
ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ],
|
||||
alloc_k, last_flushed);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c,
|
||||
alloc_key_to_missing_lru_entry,
|
||||
"missing lru entry\n"
|
||||
" %s",
|
||||
(printbuf_reset(&buf),
|
||||
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
|
||||
ret = bch2_lru_set(trans,
|
||||
alloc_k.k->p.inode,
|
||||
bucket_to_u64(alloc_k.k->p),
|
||||
a->io_time[READ]);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
goto err;
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &lru_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
|
||||
{
|
||||
struct bkey_buf last_flushed;
|
||||
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter)));
|
||||
bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed)));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, c);
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
@ -1703,6 +1703,7 @@ void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||
nr[c->open_buckets[i].data_type]++;
|
||||
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 24);
|
||||
|
||||
percpu_down_read(&c->mark_lock);
|
||||
@ -1736,6 +1737,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
|
||||
nr[c->open_buckets[i].data_type]++;
|
||||
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 12);
|
||||
printbuf_tabstop_push(out, 16);
|
||||
printbuf_tabstop_push(out, 16);
|
||||
|
@ -434,13 +434,6 @@ int bch2_check_btree_backpointers(struct bch_fs *c)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
|
||||
{
|
||||
return bpos_eq(l.k->p, r.k->p) &&
|
||||
bkey_bytes(l.k) == bkey_bytes(r.k) &&
|
||||
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
|
||||
}
|
||||
|
||||
struct extents_to_bp_state {
|
||||
struct bpos bucket_start;
|
||||
struct bpos bucket_end;
|
||||
@ -536,11 +529,8 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
struct btree_iter other_extent_iter = {};
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct bkey_s_c bp_k;
|
||||
struct bkey_buf tmp;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket);
|
||||
if (!ca) {
|
||||
prt_str(&buf, "extent for nonexistent device:bucket ");
|
||||
@ -565,22 +555,9 @@ static int check_bp_exists(struct btree_trans *trans,
|
||||
|
||||
if (bp_k.k->type != KEY_TYPE_backpointer ||
|
||||
memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, orig_k);
|
||||
|
||||
if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) {
|
||||
if (bp.level) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
}
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k);
|
||||
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
goto out;
|
||||
}
|
||||
ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
goto check_existing_bp;
|
||||
}
|
||||
@ -589,7 +566,6 @@ err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &other_extent_iter);
|
||||
bch2_trans_iter_exit(trans, &bp_iter);
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
bch2_dev_put(ca);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
@ -794,6 +770,8 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
|
||||
!((1U << btree) & btree_interior_mask))
|
||||
continue;
|
||||
|
||||
bch2_trans_begin(trans);
|
||||
|
||||
__for_each_btree_node(trans, iter, btree,
|
||||
btree == start.btree ? start.pos : POS_MIN,
|
||||
0, depth, BTREE_ITER_prefetch, b, ret) {
|
||||
@ -905,7 +883,7 @@ static int check_one_backpointer(struct btree_trans *trans,
|
||||
struct bbpos start,
|
||||
struct bbpos end,
|
||||
struct bkey_s_c_backpointer bp,
|
||||
struct bpos *last_flushed_pos)
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct btree_iter iter;
|
||||
@ -925,20 +903,18 @@ static int check_one_backpointer(struct btree_trans *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) {
|
||||
*last_flushed_pos = bp.k->p;
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans) ?:
|
||||
-BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
goto out;
|
||||
}
|
||||
if (!k.k) {
|
||||
ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (fsck_err_on(!k.k, c,
|
||||
backpointer_to_missing_ptr,
|
||||
"backpointer for missing %s\n %s",
|
||||
bp.v->level ? "btree node" : "extent",
|
||||
(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
|
||||
ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
|
||||
goto out;
|
||||
if (fsck_err(c, backpointer_to_missing_ptr,
|
||||
"backpointer for missing %s\n %s",
|
||||
bp.v->level ? "btree node" : "extent",
|
||||
(bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) {
|
||||
ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
fsck_err:
|
||||
@ -951,14 +927,20 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
|
||||
struct bbpos start,
|
||||
struct bbpos end)
|
||||
{
|
||||
struct bpos last_flushed_pos = SPOS_MAX;
|
||||
struct bkey_buf last_flushed;
|
||||
|
||||
return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
|
||||
bch2_bkey_buf_init(&last_flushed);
|
||||
bkey_init(&last_flushed.k->k);
|
||||
|
||||
int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers,
|
||||
POS_MIN, BTREE_ITER_prefetch, k,
|
||||
NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
|
||||
check_one_backpointer(trans, start, end,
|
||||
bkey_s_c_to_backpointer(k),
|
||||
&last_flushed_pos));
|
||||
&last_flushed));
|
||||
|
||||
bch2_bkey_buf_exit(&last_flushed, trans->c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_check_backpointers_to_extents(struct bch_fs *c)
|
||||
|
@ -660,8 +660,9 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
|
||||
bch2_bkey_format_field_overflows(f, i)) {
|
||||
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
|
||||
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
|
||||
u64 packed_max = f->bits_per_field[i]
|
||||
? ~((~0ULL << 1) << (f->bits_per_field[i] - 1))
|
||||
unsigned packed_bits = min(64, f->bits_per_field[i]);
|
||||
u64 packed_max = packed_bits
|
||||
? ~((~0ULL << 1) << (packed_bits - 1))
|
||||
: 0;
|
||||
|
||||
prt_printf(err, "field %u too large: %llu + %llu > %llu",
|
||||
|
@ -194,6 +194,13 @@ static inline struct bpos bkey_max(struct bpos l, struct bpos r)
|
||||
return bkey_gt(l, r) ? l : r;
|
||||
}
|
||||
|
||||
static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r)
|
||||
{
|
||||
return bpos_eq(l.k->p, r.k->p) &&
|
||||
bkey_bytes(l.k) == bkey_bytes(r.k) &&
|
||||
!memcmp(l.v, r.v, bkey_val_bytes(l.k));
|
||||
}
|
||||
|
||||
void bch2_bpos_swab(struct bpos *);
|
||||
void bch2_bkey_swab_key(const struct bkey_format *, struct bkey_packed *);
|
||||
|
||||
|
@ -903,6 +903,8 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
bch2_dev_usage_update(c, ca, &old_gc, &gc, 0, true);
|
||||
percpu_up_read(&c->mark_lock);
|
||||
|
||||
gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca);
|
||||
|
||||
if (fsck_err_on(new.data_type != gc.data_type, c,
|
||||
alloc_key_data_type_wrong,
|
||||
"bucket %llu:%llu gen %u has wrong data_type"
|
||||
@ -916,23 +918,19 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
#define copy_bucket_field(_errtype, _f) \
|
||||
if (fsck_err_on(new._f != gc._f, c, _errtype, \
|
||||
"bucket %llu:%llu gen %u data type %s has wrong " #_f \
|
||||
": got %u, should be %u", \
|
||||
": got %llu, should be %llu", \
|
||||
iter->pos.inode, iter->pos.offset, \
|
||||
gc.gen, \
|
||||
bch2_data_type_str(gc.data_type), \
|
||||
new._f, gc._f)) \
|
||||
(u64) new._f, (u64) gc._f)) \
|
||||
new._f = gc._f; \
|
||||
|
||||
copy_bucket_field(alloc_key_gen_wrong,
|
||||
gen);
|
||||
copy_bucket_field(alloc_key_dirty_sectors_wrong,
|
||||
dirty_sectors);
|
||||
copy_bucket_field(alloc_key_cached_sectors_wrong,
|
||||
cached_sectors);
|
||||
copy_bucket_field(alloc_key_stripe_wrong,
|
||||
stripe);
|
||||
copy_bucket_field(alloc_key_stripe_redundancy_wrong,
|
||||
stripe_redundancy);
|
||||
copy_bucket_field(alloc_key_gen_wrong, gen);
|
||||
copy_bucket_field(alloc_key_dirty_sectors_wrong, dirty_sectors);
|
||||
copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors);
|
||||
copy_bucket_field(alloc_key_stripe_wrong, stripe);
|
||||
copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy);
|
||||
copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru);
|
||||
#undef copy_bucket_field
|
||||
|
||||
if (!bch2_alloc_v4_cmp(*old, new))
|
||||
@ -946,7 +944,7 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
||||
a->v = new;
|
||||
|
||||
/*
|
||||
* The trigger normally makes sure this is set, but we're not running
|
||||
* The trigger normally makes sure these are set, but we're not running
|
||||
* triggers:
|
||||
*/
|
||||
if (a->v.data_type == BCH_DATA_cached && !a->v.io_time[READ])
|
||||
|
@ -1,11 +1,13 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "bcachefs.h"
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_locking.h"
|
||||
#include "btree_update.h"
|
||||
#include "btree_update_interior.h"
|
||||
#include "btree_write_buffer.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
#include "journal.h"
|
||||
#include "journal_io.h"
|
||||
#include "journal_reclaim.h"
|
||||
@ -492,6 +494,41 @@ int bch2_btree_write_buffer_tryflush(struct btree_trans *trans)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* In check and repair code, when checking references to write buffer btrees we
|
||||
* need to issue a flush before we have a definitive error: this issues a flush
|
||||
* if this is a key we haven't yet checked.
|
||||
*/
|
||||
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *trans,
|
||||
struct bkey_s_c referring_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_buf tmp;
|
||||
int ret = 0;
|
||||
|
||||
bch2_bkey_buf_init(&tmp);
|
||||
|
||||
if (!bkey_and_val_eq(referring_k, bkey_i_to_s_c(last_flushed->k))) {
|
||||
bch2_bkey_buf_reassemble(&tmp, c, referring_k);
|
||||
|
||||
if (bkey_is_btree_ptr(referring_k.k)) {
|
||||
bch2_trans_unlock(trans);
|
||||
bch2_btree_interior_updates_flush(c);
|
||||
}
|
||||
|
||||
ret = bch2_btree_write_buffer_flush_sync(trans);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
bch2_bkey_buf_copy(last_flushed, c, tmp.k);
|
||||
ret = -BCH_ERR_transaction_restart_write_buffer_flush;
|
||||
}
|
||||
err:
|
||||
bch2_bkey_buf_exit(&tmp, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void bch2_btree_write_buffer_flush_work(struct work_struct *work)
|
||||
{
|
||||
struct bch_fs *c = container_of(work, struct bch_fs, btree_write_buffer.flush_work);
|
||||
|
@ -23,6 +23,9 @@ int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
|
||||
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
|
||||
|
||||
struct bkey_buf;
|
||||
int bch2_btree_write_buffer_maybe_flush(struct btree_trans *, struct bkey_s_c, struct bkey_buf *);
|
||||
|
||||
struct journal_keys_to_wb {
|
||||
struct btree_write_buffer_keys *wb;
|
||||
size_t room;
|
||||
|
@ -132,14 +132,9 @@ static struct io_timer *get_expired_timer(struct io_clock *clock,
|
||||
{
|
||||
struct io_timer *ret = NULL;
|
||||
|
||||
spin_lock(&clock->timer_lock);
|
||||
|
||||
if (clock->timers.used &&
|
||||
time_after_eq(now, clock->timers.data[0]->expire))
|
||||
heap_pop(&clock->timers, ret, io_timer_cmp, NULL);
|
||||
|
||||
spin_unlock(&clock->timer_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -148,8 +143,10 @@ void __bch2_increment_clock(struct io_clock *clock, unsigned sectors)
|
||||
struct io_timer *timer;
|
||||
unsigned long now = atomic64_add_return(sectors, &clock->now);
|
||||
|
||||
spin_lock(&clock->timer_lock);
|
||||
while ((timer = get_expired_timer(clock, now)))
|
||||
timer->fn(timer);
|
||||
spin_unlock(&clock->timer_lock);
|
||||
}
|
||||
|
||||
void bch2_io_timers_to_text(struct printbuf *out, struct io_clock *clock)
|
||||
|
@ -5,7 +5,9 @@
|
||||
#include "bkey_buf.h"
|
||||
#include "btree_update.h"
|
||||
#include "buckets.h"
|
||||
#include "compress.h"
|
||||
#include "data_update.h"
|
||||
#include "disk_groups.h"
|
||||
#include "ec.h"
|
||||
#include "error.h"
|
||||
#include "extents.h"
|
||||
@ -454,6 +456,38 @@ static void bch2_update_unwritten_extent(struct btree_trans *trans,
|
||||
}
|
||||
}
|
||||
|
||||
void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
printbuf_tabstop_push(out, 20);
|
||||
prt_str(out, "rewrite ptrs:\t");
|
||||
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "kill ptrs:\t");
|
||||
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "target:\t");
|
||||
bch2_target_to_text(out, c, data_opts->target);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "compression:\t");
|
||||
bch2_compression_opt_to_text(out, background_compression(*io_opts));
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "extra replicas:\t");
|
||||
prt_u64(out, data_opts->extra_replicas);
|
||||
}
|
||||
|
||||
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
|
||||
{
|
||||
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
|
||||
prt_newline(out);
|
||||
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
|
||||
}
|
||||
|
||||
int bch2_extent_drop_ptrs(struct btree_trans *trans,
|
||||
struct btree_iter *iter,
|
||||
struct bkey_s_c k,
|
||||
@ -643,6 +677,16 @@ int bch2_data_update_init(struct btree_trans *trans,
|
||||
if (!(durability_have + durability_removing))
|
||||
m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
|
||||
|
||||
if (!m->op.nr_replicas) {
|
||||
struct printbuf buf = PRINTBUF;
|
||||
|
||||
bch2_data_update_to_text(&buf, m);
|
||||
WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf);
|
||||
printbuf_exit(&buf);
|
||||
ret = -BCH_ERR_data_update_done;
|
||||
goto done;
|
||||
}
|
||||
|
||||
m->op.nr_replicas_required = m->op.nr_replicas;
|
||||
|
||||
if (reserve_sectors) {
|
||||
|
@ -17,6 +17,9 @@ struct data_update_opts {
|
||||
unsigned write_flags;
|
||||
};
|
||||
|
||||
void bch2_data_update_opts_to_text(struct printbuf *, struct bch_fs *,
|
||||
struct bch_io_opts *, struct data_update_opts *);
|
||||
|
||||
struct data_update {
|
||||
/* extent being updated: */
|
||||
enum btree_id btree_id;
|
||||
@ -27,6 +30,8 @@ struct data_update {
|
||||
struct bch_write_op op;
|
||||
};
|
||||
|
||||
void bch2_data_update_to_text(struct printbuf *, struct data_update *);
|
||||
|
||||
int bch2_data_update_index_update(struct bch_write_op *);
|
||||
|
||||
void bch2_data_update_read_done(struct data_update *,
|
||||
|
@ -610,7 +610,7 @@ restart:
|
||||
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
|
||||
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
if ((ulong) trans < i->iter)
|
||||
if ((ulong) trans <= i->iter)
|
||||
continue;
|
||||
|
||||
i->iter = (ulong) trans;
|
||||
@ -832,16 +832,16 @@ static const struct file_operations btree_transaction_stats_op = {
|
||||
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
|
||||
{
|
||||
struct btree_trans *trans;
|
||||
pid_t iter = 0;
|
||||
ulong iter = 0;
|
||||
restart:
|
||||
seqmutex_lock(&c->btree_trans_lock);
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
|
||||
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
|
||||
|
||||
if (!task || task->pid <= iter)
|
||||
list_for_each_entry(trans, &c->btree_trans_list, list) {
|
||||
if ((ulong) trans <= iter)
|
||||
continue;
|
||||
|
||||
iter = task->pid;
|
||||
iter = (ulong) trans;
|
||||
|
||||
if (!closure_get_not_zero(&trans->ref))
|
||||
continue;
|
||||
|
@ -48,7 +48,7 @@ static inline unsigned eytzinger1_right_child(unsigned i)
|
||||
|
||||
static inline unsigned eytzinger1_first(unsigned size)
|
||||
{
|
||||
return rounddown_pow_of_two(size);
|
||||
return size ? rounddown_pow_of_two(size) : 0;
|
||||
}
|
||||
|
||||
static inline unsigned eytzinger1_last(unsigned size)
|
||||
@ -101,7 +101,9 @@ static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
|
||||
|
||||
static inline unsigned eytzinger1_extra(unsigned size)
|
||||
{
|
||||
return (size + 1 - rounddown_pow_of_two(size)) << 1;
|
||||
return size
|
||||
? (size + 1 - rounddown_pow_of_two(size)) << 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
|
||||
|
@ -194,6 +194,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
|
||||
* discard_new_inode() expects it to be set...
|
||||
*/
|
||||
inode->v.i_flags |= I_NEW;
|
||||
/*
|
||||
* We don't want bch2_evict_inode() to delete the inode on disk,
|
||||
* we just raced and had another inode in cache. Normally new
|
||||
* inodes don't have nlink == 0 - except tmpfiles do...
|
||||
*/
|
||||
set_nlink(&inode->v, 1);
|
||||
discard_new_inode(&inode->v);
|
||||
inode = old;
|
||||
} else {
|
||||
@ -2026,6 +2032,8 @@ err_put_super:
|
||||
__bch2_fs_stop(c);
|
||||
deactivate_locked_super(sb);
|
||||
err:
|
||||
if (ret)
|
||||
pr_err("error: %s", bch2_err_str(ret));
|
||||
/*
|
||||
* On an inconsistency error in recovery we might see an -EROFS derived
|
||||
* errorcode (from the journal), but we don't want to return that to
|
||||
@ -2065,7 +2073,8 @@ int __init bch2_vfs_init(void)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
|
||||
bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT);
|
||||
bch2_inode_cache = KMEM_CACHE(bch_inode_info, SLAB_RECLAIM_ACCOUNT |
|
||||
SLAB_ACCOUNT);
|
||||
if (!bch2_inode_cache)
|
||||
goto err;
|
||||
|
||||
|
@ -389,7 +389,6 @@ retry:
|
||||
|
||||
bch2_bkey_buf_reassemble(&sk, c, k);
|
||||
k = bkey_i_to_s_c(sk.k);
|
||||
bch2_trans_unlock(trans);
|
||||
|
||||
if (!bch2_bkey_matches_ptr(c, k,
|
||||
rbio->pick.ptr,
|
||||
@ -1004,6 +1003,9 @@ get_bio:
|
||||
rbio->promote = promote;
|
||||
INIT_WORK(&rbio->work, NULL);
|
||||
|
||||
if (flags & BCH_READ_NODECODE)
|
||||
orig->pick = pick;
|
||||
|
||||
rbio->bio.bi_opf = orig->bio.bi_opf;
|
||||
rbio->bio.bi_iter.bi_sector = pick.ptr.offset;
|
||||
rbio->bio.bi_end_io = bch2_read_endio;
|
||||
|
@ -1095,7 +1095,7 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int bch2_dev_journal_alloc(struct bch_dev *ca)
|
||||
int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
|
||||
{
|
||||
unsigned nr;
|
||||
int ret;
|
||||
@ -1117,7 +1117,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca)
|
||||
min(1 << 13,
|
||||
(1 << 24) / ca->mi.bucket_size));
|
||||
|
||||
ret = __bch2_set_nr_journal_buckets(ca, nr, true, NULL);
|
||||
ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
|
||||
err:
|
||||
bch_err_fn(ca, ret);
|
||||
return ret;
|
||||
@ -1129,7 +1129,7 @@ int bch2_fs_journal_alloc(struct bch_fs *c)
|
||||
if (ca->journal.nr)
|
||||
continue;
|
||||
|
||||
int ret = bch2_dev_journal_alloc(ca);
|
||||
int ret = bch2_dev_journal_alloc(ca, true);
|
||||
if (ret) {
|
||||
percpu_ref_put(&ca->io_ref);
|
||||
return ret;
|
||||
@ -1184,9 +1184,11 @@ void bch2_fs_journal_stop(struct journal *j)
|
||||
journal_quiesce(j);
|
||||
cancel_delayed_work_sync(&j->write_work);
|
||||
|
||||
BUG_ON(!bch2_journal_error(j) &&
|
||||
test_bit(JOURNAL_replay_done, &j->flags) &&
|
||||
j->last_empty_seq != journal_cur_seq(j));
|
||||
WARN(!bch2_journal_error(j) &&
|
||||
test_bit(JOURNAL_replay_done, &j->flags) &&
|
||||
j->last_empty_seq != journal_cur_seq(j),
|
||||
"journal shutdown error: cur seq %llu but last empty seq %llu",
|
||||
journal_cur_seq(j), j->last_empty_seq);
|
||||
|
||||
if (!bch2_journal_error(j))
|
||||
clear_bit(JOURNAL_running, &j->flags);
|
||||
@ -1418,8 +1420,8 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
||||
unsigned long now = jiffies;
|
||||
u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
|
||||
|
||||
if (!out->nr_tabstops)
|
||||
printbuf_tabstop_push(out, 28);
|
||||
printbuf_tabstops_reset(out);
|
||||
printbuf_tabstop_push(out, 28);
|
||||
out->atomic++;
|
||||
|
||||
rcu_read_lock();
|
||||
|
@ -433,7 +433,7 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
|
||||
|
||||
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
|
||||
unsigned nr);
|
||||
int bch2_dev_journal_alloc(struct bch_dev *);
|
||||
int bch2_dev_journal_alloc(struct bch_dev *, bool);
|
||||
int bch2_fs_journal_alloc(struct bch_fs *);
|
||||
|
||||
void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
|
||||
|
@ -415,6 +415,8 @@ static int journal_entry_btree_keys_validate(struct bch_fs *c,
|
||||
flags|BCH_VALIDATE_journal);
|
||||
if (ret == FSCK_DELETED_KEY)
|
||||
continue;
|
||||
else if (ret)
|
||||
return ret;
|
||||
|
||||
k = bkey_next(k);
|
||||
}
|
||||
@ -1762,11 +1764,13 @@ static CLOSURE_CALLBACK(journal_write_preflush)
|
||||
|
||||
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
|
||||
spin_lock(&j->lock);
|
||||
closure_wait(&j->async_wait, cl);
|
||||
if (j->seq_ondisk + 1 != le64_to_cpu(w->data->seq)) {
|
||||
closure_wait(&j->async_wait, cl);
|
||||
spin_unlock(&j->lock);
|
||||
continue_at(cl, journal_write_preflush, j->wq);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&j->lock);
|
||||
|
||||
continue_at(cl, journal_write_preflush, j->wq);
|
||||
return;
|
||||
}
|
||||
|
||||
if (w->separate_flush) {
|
||||
|
@ -77,6 +77,45 @@ static const char * const bch2_lru_types[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
int bch2_lru_check_set(struct btree_trans *trans,
|
||||
u16 lru_id, u64 time,
|
||||
struct bkey_s_c referring_k,
|
||||
struct bkey_buf *last_flushed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct btree_iter lru_iter;
|
||||
struct bkey_s_c lru_k =
|
||||
bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru,
|
||||
lru_pos(lru_id,
|
||||
bucket_to_u64(referring_k.k->p),
|
||||
time), 0);
|
||||
int ret = bkey_err(lru_k);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (lru_k.k->type != KEY_TYPE_set) {
|
||||
ret = bch2_btree_write_buffer_maybe_flush(trans, referring_k, last_flushed);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (fsck_err(c, alloc_key_to_missing_lru_entry,
|
||||
"missing %s lru entry\n"
|
||||
" %s",
|
||||
bch2_lru_types[lru_type(lru_k)],
|
||||
(bch2_bkey_val_to_text(&buf, c, referring_k), buf.buf))) {
|
||||
ret = bch2_lru_set(trans, lru_id, bucket_to_u64(referring_k.k->p), time);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
err:
|
||||
fsck_err:
|
||||
bch2_trans_iter_exit(trans, &lru_iter);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch2_check_lru_key(struct btree_trans *trans,
|
||||
struct btree_iter *lru_iter,
|
||||
struct bkey_s_c lru_k,
|
||||
|
@ -61,6 +61,9 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64);
|
||||
int bch2_lru_set(struct btree_trans *, u16, u64, u64);
|
||||
int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
|
||||
|
||||
struct bkey_buf;
|
||||
int bch2_lru_check_set(struct btree_trans *, u16, u64, struct bkey_s_c, struct bkey_buf *);
|
||||
|
||||
int bch2_check_lrus(struct bch_fs *);
|
||||
|
||||
#endif /* _BCACHEFS_LRU_H */
|
||||
|
@ -36,31 +36,6 @@ const char * const bch2_data_ops_strs[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
static void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
{
|
||||
printbuf_tabstop_push(out, 20);
|
||||
prt_str(out, "rewrite ptrs:\t");
|
||||
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "kill ptrs:\t");
|
||||
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "target:\t");
|
||||
bch2_target_to_text(out, c, data_opts->target);
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "compression:\t");
|
||||
bch2_compression_opt_to_text(out, background_compression(*io_opts));
|
||||
prt_newline(out);
|
||||
|
||||
prt_str(out, "extra replicas:\t");
|
||||
prt_u64(out, data_opts->extra_replicas);
|
||||
}
|
||||
|
||||
static void trace_move_extent2(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bch_io_opts *io_opts,
|
||||
struct data_update_opts *data_opts)
|
||||
|
@ -286,7 +286,8 @@ enum bch_fsck_flags {
|
||||
x(accounting_mismatch, 272, 0) \
|
||||
x(accounting_replicas_not_marked, 273, 0) \
|
||||
x(invalid_btree_id, 274, 0) \
|
||||
x(alloc_key_io_time_bad, 275, 0)
|
||||
x(alloc_key_io_time_bad, 275, 0) \
|
||||
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
@ -563,8 +563,11 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
BUG_ON(atomic_read(&c->journal_keys.ref));
|
||||
bch2_fs_btree_write_buffer_exit(c);
|
||||
percpu_free_rwsem(&c->mark_lock);
|
||||
EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
|
||||
free_percpu(c->online_reserved);
|
||||
if (c->online_reserved) {
|
||||
u64 v = percpu_u64_get(c->online_reserved);
|
||||
WARN(v, "online_reserved not 0 at shutdown: %lli", v);
|
||||
free_percpu(c->online_reserved);
|
||||
}
|
||||
|
||||
darray_exit(&c->btree_roots_extra);
|
||||
free_percpu(c->pcpu);
|
||||
@ -1769,7 +1772,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
ret = bch2_dev_journal_alloc(ca, true);
|
||||
bch_err_msg(c, ret, "allocating journal");
|
||||
if (ret)
|
||||
goto err;
|
||||
@ -1929,7 +1932,7 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
|
||||
}
|
||||
|
||||
if (!ca->journal.nr) {
|
||||
ret = bch2_dev_journal_alloc(ca);
|
||||
ret = bch2_dev_journal_alloc(ca, false);
|
||||
bch_err_msg(ca, ret, "allocating journal");
|
||||
if (ret)
|
||||
goto err;
|
||||
|
@ -159,6 +159,7 @@ struct closure {
|
||||
#ifdef CONFIG_DEBUG_CLOSURES
|
||||
#define CLOSURE_MAGIC_DEAD 0xc054dead
|
||||
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
|
||||
#define CLOSURE_MAGIC_STACK 0xc05451cc
|
||||
|
||||
unsigned int magic;
|
||||
struct list_head all;
|
||||
@ -323,12 +324,18 @@ static inline void closure_init_stack(struct closure *cl)
|
||||
{
|
||||
memset(cl, 0, sizeof(struct closure));
|
||||
atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
|
||||
#ifdef CONFIG_DEBUG_CLOSURES
|
||||
cl->magic = CLOSURE_MAGIC_STACK;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void closure_init_stack_release(struct closure *cl)
|
||||
{
|
||||
memset(cl, 0, sizeof(struct closure));
|
||||
atomic_set_release(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
|
||||
#ifdef CONFIG_DEBUG_CLOSURES
|
||||
cl->magic = CLOSURE_MAGIC_STACK;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -244,6 +244,9 @@ void closure_debug_destroy(struct closure *cl)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (cl->magic == CLOSURE_MAGIC_STACK)
|
||||
return;
|
||||
|
||||
BUG_ON(cl->magic != CLOSURE_MAGIC_ALIVE);
|
||||
cl->magic = CLOSURE_MAGIC_DEAD;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user