bcachefs: Improvements to the journal read error paths

- Print out more information in error messages
 - On checksum error, keep the journal entry but mark it bad so that we
   can prefer entries from other devices that don't have bad checksums

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2020-08-24 15:58:26 -04:00 committed by Kent Overstreet
parent a672fb8f5d
commit ca73852a13
2 changed files with 61 additions and 27 deletions

View File

@ -28,9 +28,11 @@ struct journal_list {
* be replayed:
*/
static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
struct journal_list *jlist, struct jset *j)
struct journal_list *jlist, struct jset *j,
bool bad)
{
struct journal_replay *i, *pos;
struct bch_devs_list devs = { .nr = 0 };
struct list_head *where;
size_t bytes = vstruct_bytes(j);
__le64 last_seq;
@ -59,15 +61,6 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
}
list_for_each_entry_reverse(i, jlist->head, list) {
/* Duplicate? */
if (le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
fsck_err_on(bytes != vstruct_bytes(&i->j) ||
memcmp(j, &i->j, bytes), c,
"found duplicate but non identical journal entries (seq %llu)",
le64_to_cpu(j->seq));
goto found;
}
if (le64_to_cpu(j->seq) > le64_to_cpu(i->j.seq)) {
where = &i->list;
goto add;
@ -76,6 +69,32 @@ static int journal_entry_add(struct bch_fs *c, struct bch_dev *ca,
where = jlist->head;
add:
i = where->next != jlist->head
? container_of(where->next, struct journal_replay, list)
: NULL;
/*
* Duplicate journal entries? If so we want the one that didn't have a
* checksum error:
*/
if (i && le64_to_cpu(j->seq) == le64_to_cpu(i->j.seq)) {
if (i->bad) {
devs = i->devs;
list_del(&i->list);
kvpfree(i, offsetof(struct journal_replay, j) +
vstruct_bytes(&i->j));
} else if (bad) {
goto found;
} else {
fsck_err_on(bytes != vstruct_bytes(&i->j) ||
memcmp(j, &i->j, bytes), c,
"found duplicate but non identical journal entries (seq %llu)",
le64_to_cpu(j->seq));
goto found;
}
}
i = kvpmalloc(offsetof(struct journal_replay, j) + bytes, GFP_KERNEL);
if (!i) {
ret = -ENOMEM;
@ -83,7 +102,8 @@ add:
}
list_add(&i->list, where);
i->devs.nr = 0;
i->devs = devs;
i->bad = bad;
unsafe_memcpy(&i->j, j, bytes, "embedded variable length struct");
found:
if (!bch2_dev_list_has_dev(i->devs, ca->dev_idx))
@ -390,6 +410,7 @@ fsck_err:
}
static int jset_validate(struct bch_fs *c,
struct bch_dev *ca,
struct jset *jset, u64 sector,
unsigned bucket_sectors_left,
unsigned sectors_read,
@ -404,16 +425,19 @@ static int jset_validate(struct bch_fs *c,
return JOURNAL_ENTRY_NONE;
version = le32_to_cpu(jset->version);
if ((version != BCH_JSET_VERSION_OLD &&
version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max) {
bch_err(c, "unknown journal entry version %u", jset->version);
return BCH_FSCK_UNKNOWN_VERSION;
if (journal_entry_err_on((version != BCH_JSET_VERSION_OLD &&
version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max, c,
"%s sector %llu seq %llu: unknown journal entry version %u",
ca->name, sector, le64_to_cpu(jset->seq),
version)) {
/* XXX: note we might have missing journal entries */
return JOURNAL_ENTRY_BAD;
}
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
"journal entry too big (%zu bytes), sector %lluu",
bytes, sector)) {
"%s sector %llu seq %llu: journal entry too big (%zu bytes)",
ca->name, sector, le64_to_cpu(jset->seq), bytes)) {
/* XXX: note we might have missing journal entries */
return JOURNAL_ENTRY_BAD;
}
@ -422,13 +446,15 @@ static int jset_validate(struct bch_fs *c,
return JOURNAL_ENTRY_REREAD;
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
"journal entry with unknown csum type %llu sector %lluu",
JSET_CSUM_TYPE(jset), sector))
"%s sector %llu seq %llu: journal entry with unknown csum type %llu",
ca->name, sector, le64_to_cpu(jset->seq),
JSET_CSUM_TYPE(jset)))
return JOURNAL_ENTRY_BAD;
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
"journal checksum bad, sector %llu", sector)) {
"%s sector %llu seq %llu: journal checksum bad",
ca->name, sector, le64_to_cpu(jset->seq))) {
/* XXX: retry IO, when we start retrying checksum errors */
/* XXX: note we might have missing journal entries */
return JOURNAL_ENTRY_BAD;
@ -439,8 +465,10 @@ static int jset_validate(struct bch_fs *c,
vstruct_end(jset) - (void *) jset->encrypted_start);
if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
"invalid journal entry: last_seq > seq"))
"invalid journal entry: last_seq > seq")) {
jset->last_seq = jset->seq;
return JOURNAL_ENTRY_BAD;
}
return 0;
fsck_err:
@ -515,11 +543,12 @@ reread:
j = buf->data;
}
ret = jset_validate(c, j, offset,
ret = jset_validate(c, ca, j, offset,
end - offset, sectors_read,
READ);
switch (ret) {
case BCH_FSCK_OK:
sectors = vstruct_sectors(j, c->block_bits);
break;
case JOURNAL_ENTRY_REREAD:
if (vstruct_bytes(j) > buf->size) {
@ -536,8 +565,13 @@ reread:
goto next_block;
case JOURNAL_ENTRY_BAD:
saw_bad = true;
/*
* On checksum error we don't really trust the size
* field of the journal entry we read, so try reading
* again at next block boundary:
*/
sectors = c->opts.block_size;
goto next_block;
break;
default:
return ret;
}
@ -554,7 +588,7 @@ reread:
ja->bucket_seq[bucket] = le64_to_cpu(j->seq);
mutex_lock(&jlist->lock);
ret = journal_entry_add(c, ca, jlist, j);
ret = journal_entry_add(c, ca, jlist, j, ret != 0);
mutex_unlock(&jlist->lock);
switch (ret) {
@ -565,8 +599,6 @@ reread:
default:
return ret;
}
sectors = vstruct_sectors(j, c->block_bits);
next_block:
pr_debug("next");
offset += sectors;

View File

@ -9,6 +9,8 @@
struct journal_replay {
struct list_head list;
struct bch_devs_list devs;
/* checksum error, but we may want to try using it anyways: */
bool bad;
/* must be last: */
struct jset j;
};