mirror of
https://github.com/torvalds/linux.git
synced 2024-11-22 04:02:20 +00:00
bcachefs: BTREE_ITER_WITH_JOURNAL
This adds a new btree iterator flag, BTREE_ITER_WITH_JOURNAL, that is automatically enabled when initializing a btree iterator before journal replay has completed - it overlays the contents of the journal with the btree. This lets us delete bch2_btree_and_journal_walk() and just use the normal btree iterator interface instead - which also lets us delete a significant amount of duplicated code. Note that BTREE_ITER_WITH_JOURNAL is still unoptimized in this patch - we're redoing the binary search over keys in the journal every time we call bch2_btree_iter_peek(). Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
f28620c108
commit
5222a4607c
@ -340,46 +340,46 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c,
|
|||||||
#undef x
|
#undef x
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct bch_dev *ca;
|
|
||||||
struct bucket *g;
|
|
||||||
struct bkey_alloc_unpacked u;
|
|
||||||
|
|
||||||
if (!bkey_is_alloc(k.k))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
|
||||||
g = bucket(ca, k.k->p.offset);
|
|
||||||
u = bch2_alloc_unpack(k);
|
|
||||||
|
|
||||||
*bucket_gen(ca, k.k->p.offset) = u.gen;
|
|
||||||
g->_mark.gen = u.gen;
|
|
||||||
g->_mark.data_type = u.data_type;
|
|
||||||
g->_mark.dirty_sectors = u.dirty_sectors;
|
|
||||||
g->_mark.cached_sectors = u.cached_sectors;
|
|
||||||
g->_mark.stripe = u.stripe != 0;
|
|
||||||
g->stripe = u.stripe;
|
|
||||||
g->stripe_redundancy = u.stripe_redundancy;
|
|
||||||
g->io_time[READ] = u.read_time;
|
|
||||||
g->io_time[WRITE] = u.write_time;
|
|
||||||
g->oldest_gen = u.oldest_gen;
|
|
||||||
g->gen_valid = 1;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_alloc_read(struct bch_fs *c)
|
int bch2_alloc_read(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
struct bch_dev *ca;
|
||||||
|
struct bucket *g;
|
||||||
|
struct bkey_alloc_unpacked u;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
down_read(&c->gc_lock);
|
down_read(&c->gc_lock);
|
||||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_alloc, bch2_alloc_read_fn);
|
|
||||||
|
for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
|
||||||
|
BTREE_ITER_PREFETCH, k, ret) {
|
||||||
|
if (!bkey_is_alloc(k.k))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ca = bch_dev_bkey_exists(c, k.k->p.inode);
|
||||||
|
g = bucket(ca, k.k->p.offset);
|
||||||
|
u = bch2_alloc_unpack(k);
|
||||||
|
|
||||||
|
*bucket_gen(ca, k.k->p.offset) = u.gen;
|
||||||
|
g->_mark.gen = u.gen;
|
||||||
|
g->_mark.data_type = u.data_type;
|
||||||
|
g->_mark.dirty_sectors = u.dirty_sectors;
|
||||||
|
g->_mark.cached_sectors = u.cached_sectors;
|
||||||
|
g->_mark.stripe = u.stripe != 0;
|
||||||
|
g->stripe = u.stripe;
|
||||||
|
g->stripe_redundancy = u.stripe_redundancy;
|
||||||
|
g->io_time[READ] = u.read_time;
|
||||||
|
g->io_time[WRITE] = u.write_time;
|
||||||
|
g->oldest_gen = u.oldest_gen;
|
||||||
|
g->gen_valid = 1;
|
||||||
|
}
|
||||||
|
bch2_trans_iter_exit(&trans, &iter);
|
||||||
|
|
||||||
up_read(&c->gc_lock);
|
up_read(&c->gc_lock);
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
bch_err(c, "error reading alloc info: %i", ret);
|
bch_err(c, "error reading alloc info: %i", ret);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -860,7 +860,6 @@ mempool_t bio_bounce_pages;
|
|||||||
u64 reflink_hint;
|
u64 reflink_hint;
|
||||||
reflink_gc_table reflink_gc_table;
|
reflink_gc_table reflink_gc_table;
|
||||||
size_t reflink_gc_nr;
|
size_t reflink_gc_nr;
|
||||||
size_t reflink_gc_idx;
|
|
||||||
|
|
||||||
/* VFS IO PATH - fs-io.c */
|
/* VFS IO PATH - fs-io.c */
|
||||||
struct bio_set writepage_bioset;
|
struct bio_set writepage_bioset;
|
||||||
|
@ -1342,59 +1342,6 @@ static int bch2_gc_start(struct bch_fs *c,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_reflink_done_initial_fn(struct btree_trans *trans,
|
|
||||||
struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct reflink_gc *r;
|
|
||||||
const __le64 *refcount = bkey_refcount_c(k);
|
|
||||||
char buf[200];
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (!refcount)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
r = genradix_ptr(&c->reflink_gc_table, c->reflink_gc_idx++);
|
|
||||||
if (!r)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
if (!r ||
|
|
||||||
r->offset != k.k->p.offset ||
|
|
||||||
r->size != k.k->size) {
|
|
||||||
bch_err(c, "unexpected inconsistency walking reflink table at gc finish");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fsck_err_on(r->refcount != le64_to_cpu(*refcount), c,
|
|
||||||
"reflink key has wrong refcount:\n"
|
|
||||||
" %s\n"
|
|
||||||
" should be %u",
|
|
||||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
|
||||||
r->refcount)) {
|
|
||||||
struct bkey_i *new;
|
|
||||||
|
|
||||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
|
||||||
if (!new) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto fsck_err;
|
|
||||||
}
|
|
||||||
|
|
||||||
bkey_reassemble(new, k);
|
|
||||||
|
|
||||||
if (!r->refcount) {
|
|
||||||
new->k.type = KEY_TYPE_deleted;
|
|
||||||
new->k.size = 0;
|
|
||||||
} else {
|
|
||||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = bch2_journal_key_insert(c, BTREE_ID_reflink, 0, new);
|
|
||||||
kfree(new);
|
|
||||||
}
|
|
||||||
fsck_err:
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
||||||
bool metadata_only)
|
bool metadata_only)
|
||||||
{
|
{
|
||||||
@ -1411,14 +1358,6 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
|||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
|
|
||||||
if (initial) {
|
|
||||||
c->reflink_gc_idx = 0;
|
|
||||||
|
|
||||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
|
|
||||||
bch2_gc_reflink_done_initial_fn);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||||
BTREE_ITER_PREFETCH, k, ret) {
|
BTREE_ITER_PREFETCH, k, ret) {
|
||||||
const __le64 *refcount = bkey_refcount_c(k);
|
const __le64 *refcount = bkey_refcount_c(k);
|
||||||
@ -1426,7 +1365,7 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
|||||||
if (!refcount)
|
if (!refcount)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
r = genradix_ptr(&c->reflink_gc_table, idx);
|
r = genradix_ptr(&c->reflink_gc_table, idx++);
|
||||||
if (!r ||
|
if (!r ||
|
||||||
r->offset != k.k->p.offset ||
|
r->offset != k.k->p.offset ||
|
||||||
r->size != k.k->size) {
|
r->size != k.k->size) {
|
||||||
@ -1456,7 +1395,9 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
|||||||
else
|
else
|
||||||
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
*bkey_refcount(new) = cpu_to_le64(r->refcount);
|
||||||
|
|
||||||
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
|
ret = initial
|
||||||
|
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, new)
|
||||||
|
: __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||||
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
|
__bch2_btree_insert(&trans, BTREE_ID_reflink, new));
|
||||||
kfree(new);
|
kfree(new);
|
||||||
|
|
||||||
@ -1466,64 +1407,21 @@ static int bch2_gc_reflink_done(struct bch_fs *c, bool initial,
|
|||||||
}
|
}
|
||||||
fsck_err:
|
fsck_err:
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
bch2_trans_iter_exit(&trans, &iter);
|
||||||
out:
|
|
||||||
c->reflink_gc_nr = 0;
|
c->reflink_gc_nr = 0;
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_stripes_done_initial_fn(struct btree_trans *trans,
|
|
||||||
struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct gc_stripe *m;
|
|
||||||
const struct bch_stripe *s;
|
|
||||||
char buf[200];
|
|
||||||
unsigned i;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (k.k->type != KEY_TYPE_stripe)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
s = bkey_s_c_to_stripe(k).v;
|
|
||||||
|
|
||||||
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
|
|
||||||
|
|
||||||
for (i = 0; i < s->nr_blocks; i++)
|
|
||||||
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
|
|
||||||
goto inconsistent;
|
|
||||||
return 0;
|
|
||||||
inconsistent:
|
|
||||||
if (fsck_err_on(true, c,
|
|
||||||
"stripe has wrong block sector count %u:\n"
|
|
||||||
" %s\n"
|
|
||||||
" should be %u", i,
|
|
||||||
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
|
||||||
m ? m->block_sectors[i] : 0)) {
|
|
||||||
struct bkey_i_stripe *new;
|
|
||||||
|
|
||||||
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
|
||||||
if (!new) {
|
|
||||||
ret = -ENOMEM;
|
|
||||||
goto fsck_err;
|
|
||||||
}
|
|
||||||
|
|
||||||
bkey_reassemble(&new->k_i, k);
|
|
||||||
|
|
||||||
for (i = 0; i < new->v.nr_blocks; i++)
|
|
||||||
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
|
|
||||||
|
|
||||||
ret = bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i);
|
|
||||||
kfree(new);
|
|
||||||
}
|
|
||||||
fsck_err:
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
||||||
bool metadata_only)
|
bool metadata_only)
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
struct gc_stripe *m;
|
||||||
|
const struct bch_stripe *s;
|
||||||
|
char buf[200];
|
||||||
|
unsigned i;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (metadata_only)
|
if (metadata_only)
|
||||||
@ -1531,39 +1429,52 @@ static int bch2_gc_stripes_done(struct bch_fs *c, bool initial,
|
|||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
|
|
||||||
if (initial) {
|
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
|
BTREE_ITER_PREFETCH, k, ret) {
|
||||||
bch2_gc_stripes_done_initial_fn);
|
if (k.k->type != KEY_TYPE_stripe)
|
||||||
} else {
|
continue;
|
||||||
BUG();
|
|
||||||
|
s = bkey_s_c_to_stripe(k).v;
|
||||||
|
m = genradix_ptr(&c->gc_stripes, k.k->p.offset);
|
||||||
|
|
||||||
|
for (i = 0; i < s->nr_blocks; i++)
|
||||||
|
if (stripe_blockcount_get(s, i) != (m ? m->block_sectors[i] : 0))
|
||||||
|
goto inconsistent;
|
||||||
|
continue;
|
||||||
|
inconsistent:
|
||||||
|
if (fsck_err_on(true, c,
|
||||||
|
"stripe has wrong block sector count %u:\n"
|
||||||
|
" %s\n"
|
||||||
|
" should be %u", i,
|
||||||
|
(bch2_bkey_val_to_text(&PBUF(buf), c, k), buf),
|
||||||
|
m ? m->block_sectors[i] : 0)) {
|
||||||
|
struct bkey_i_stripe *new;
|
||||||
|
|
||||||
|
new = kmalloc(bkey_bytes(k.k), GFP_KERNEL);
|
||||||
|
if (!new) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
bkey_reassemble(&new->k_i, k);
|
||||||
|
|
||||||
|
for (i = 0; i < new->v.nr_blocks; i++)
|
||||||
|
stripe_blockcount_set(&new->v, i, m ? m->block_sectors[i] : 0);
|
||||||
|
|
||||||
|
ret = initial
|
||||||
|
? bch2_journal_key_insert(c, BTREE_ID_stripes, 0, &new->k_i)
|
||||||
|
: __bch2_trans_do(&trans, NULL, NULL, 0,
|
||||||
|
__bch2_btree_insert(&trans, BTREE_ID_reflink, &new->k_i));
|
||||||
|
kfree(new);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
fsck_err:
|
||||||
|
bch2_trans_iter_exit(&trans, &iter);
|
||||||
|
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_gc_reflink_start_initial_fn(struct btree_trans *trans,
|
|
||||||
struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct reflink_gc *r;
|
|
||||||
const __le64 *refcount = bkey_refcount_c(k);
|
|
||||||
|
|
||||||
if (!refcount)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
r = genradix_ptr_alloc(&c->reflink_gc_table, c->reflink_gc_nr++,
|
|
||||||
GFP_KERNEL);
|
|
||||||
if (!r)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
r->offset = k.k->p.offset;
|
|
||||||
r->size = k.k->size;
|
|
||||||
r->refcount = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
||||||
bool metadata_only)
|
bool metadata_only)
|
||||||
{
|
{
|
||||||
@ -1579,12 +1490,6 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
|||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
c->reflink_gc_nr = 0;
|
c->reflink_gc_nr = 0;
|
||||||
|
|
||||||
if (initial) {
|
|
||||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_reflink,
|
|
||||||
bch2_gc_reflink_start_initial_fn);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
for_each_btree_key(&trans, iter, BTREE_ID_reflink, POS_MIN,
|
||||||
BTREE_ITER_PREFETCH, k, ret) {
|
BTREE_ITER_PREFETCH, k, ret) {
|
||||||
const __le64 *refcount = bkey_refcount_c(k);
|
const __le64 *refcount = bkey_refcount_c(k);
|
||||||
@ -1604,7 +1509,7 @@ static int bch2_gc_reflink_start(struct bch_fs *c, bool initial,
|
|||||||
r->refcount = 0;
|
r->refcount = 0;
|
||||||
}
|
}
|
||||||
bch2_trans_iter_exit(&trans, &iter);
|
bch2_trans_iter_exit(&trans, &iter);
|
||||||
out:
|
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "extents.h"
|
#include "extents.h"
|
||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
|
#include "recovery.h"
|
||||||
#include "replicas.h"
|
#include "replicas.h"
|
||||||
#include "subvolume.h"
|
#include "subvolume.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
@ -1064,6 +1065,7 @@ static inline bool btree_path_advance_to_pos(struct btree_path *path,
|
|||||||
static void btree_path_verify_new_node(struct btree_trans *trans,
|
static void btree_path_verify_new_node(struct btree_trans *trans,
|
||||||
struct btree_path *path, struct btree *b)
|
struct btree_path *path, struct btree *b)
|
||||||
{
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_path_level *l;
|
struct btree_path_level *l;
|
||||||
unsigned plevel;
|
unsigned plevel;
|
||||||
bool parent_locked;
|
bool parent_locked;
|
||||||
@ -1072,6 +1074,9 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
|
|||||||
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
if (!IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (trans->journal_replay_not_finished)
|
||||||
|
return;
|
||||||
|
|
||||||
plevel = b->c.level + 1;
|
plevel = b->c.level + 1;
|
||||||
if (!btree_path_node(path, plevel))
|
if (!btree_path_node(path, plevel))
|
||||||
return;
|
return;
|
||||||
@ -1092,7 +1097,7 @@ static void btree_path_verify_new_node(struct btree_trans *trans,
|
|||||||
char buf4[100];
|
char buf4[100];
|
||||||
struct bkey uk = bkey_unpack_key(b, k);
|
struct bkey uk = bkey_unpack_key(b, k);
|
||||||
|
|
||||||
bch2_dump_btree_node(trans->c, l->b);
|
bch2_dump_btree_node(c, l->b);
|
||||||
bch2_bpos_to_text(&PBUF(buf1), path->pos);
|
bch2_bpos_to_text(&PBUF(buf1), path->pos);
|
||||||
bch2_bkey_to_text(&PBUF(buf2), &uk);
|
bch2_bkey_to_text(&PBUF(buf2), &uk);
|
||||||
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
|
bch2_bpos_to_text(&PBUF(buf3), b->data->min_key);
|
||||||
@ -1283,6 +1288,41 @@ static int btree_path_prefetch(struct btree_trans *trans, struct btree_path *pat
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int btree_path_prefetch_j(struct btree_trans *trans, struct btree_path *path,
|
||||||
|
struct btree_and_journal_iter *jiter)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
struct bkey_buf tmp;
|
||||||
|
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
|
||||||
|
? (path->level > 1 ? 0 : 2)
|
||||||
|
: (path->level > 1 ? 1 : 16);
|
||||||
|
bool was_locked = btree_node_locked(path, path->level);
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
bch2_bkey_buf_init(&tmp);
|
||||||
|
|
||||||
|
while (nr && !ret) {
|
||||||
|
if (!bch2_btree_node_relock(trans, path, path->level))
|
||||||
|
break;
|
||||||
|
|
||||||
|
bch2_btree_and_journal_iter_advance(jiter);
|
||||||
|
k = bch2_btree_and_journal_iter_peek(jiter);
|
||||||
|
if (!k.k)
|
||||||
|
break;
|
||||||
|
|
||||||
|
bch2_bkey_buf_reassemble(&tmp, c, k);
|
||||||
|
ret = bch2_btree_node_prefetch(c, trans, path, tmp.k, path->btree_id,
|
||||||
|
path->level - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!was_locked)
|
||||||
|
btree_node_unlock(path, path->level);
|
||||||
|
|
||||||
|
bch2_bkey_buf_exit(&tmp, c);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
||||||
struct btree_path *path,
|
struct btree_path *path,
|
||||||
unsigned plevel, struct btree *b)
|
unsigned plevel, struct btree *b)
|
||||||
@ -1305,6 +1345,30 @@ static noinline void btree_node_mem_ptr_set(struct btree_trans *trans,
|
|||||||
btree_node_unlock(path, plevel);
|
btree_node_unlock(path, plevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
|
||||||
|
struct btree_path *path,
|
||||||
|
unsigned flags,
|
||||||
|
struct bkey_buf *out)
|
||||||
|
{
|
||||||
|
struct bch_fs *c = trans->c;
|
||||||
|
struct btree_path_level *l = path_l(path);
|
||||||
|
struct btree_and_journal_iter jiter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
__bch2_btree_and_journal_iter_init_node_iter(&jiter, c, l->b, l->iter, path->pos);
|
||||||
|
|
||||||
|
k = bch2_btree_and_journal_iter_peek(&jiter);
|
||||||
|
|
||||||
|
bch2_bkey_buf_reassemble(out, c, k);
|
||||||
|
|
||||||
|
if (flags & BTREE_ITER_PREFETCH)
|
||||||
|
ret = btree_path_prefetch_j(trans, path, &jiter);
|
||||||
|
|
||||||
|
bch2_btree_and_journal_iter_exit(&jiter);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline int btree_path_down(struct btree_trans *trans,
|
static __always_inline int btree_path_down(struct btree_trans *trans,
|
||||||
struct btree_path *path,
|
struct btree_path *path,
|
||||||
unsigned flags,
|
unsigned flags,
|
||||||
@ -1321,8 +1385,21 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
|||||||
EBUG_ON(!btree_node_locked(path, path->level));
|
EBUG_ON(!btree_node_locked(path, path->level));
|
||||||
|
|
||||||
bch2_bkey_buf_init(&tmp);
|
bch2_bkey_buf_init(&tmp);
|
||||||
bch2_bkey_buf_unpack(&tmp, c, l->b,
|
|
||||||
bch2_btree_node_iter_peek(&l->iter, l->b));
|
if (unlikely(trans->journal_replay_not_finished)) {
|
||||||
|
ret = btree_node_iter_and_journal_peek(trans, path, flags, &tmp);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
} else {
|
||||||
|
bch2_bkey_buf_unpack(&tmp, c, l->b,
|
||||||
|
bch2_btree_node_iter_peek(&l->iter, l->b));
|
||||||
|
|
||||||
|
if (flags & BTREE_ITER_PREFETCH) {
|
||||||
|
ret = btree_path_prefetch(trans, path);
|
||||||
|
if (ret)
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
|
b = bch2_btree_node_get(trans, path, tmp.k, level, lock_type, trace_ip);
|
||||||
ret = PTR_ERR_OR_ZERO(b);
|
ret = PTR_ERR_OR_ZERO(b);
|
||||||
@ -1332,13 +1409,11 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
|
|||||||
mark_btree_node_locked(path, level, lock_type);
|
mark_btree_node_locked(path, level, lock_type);
|
||||||
btree_path_level_init(trans, path, b);
|
btree_path_level_init(trans, path, b);
|
||||||
|
|
||||||
if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
|
if (likely(!trans->journal_replay_not_finished &&
|
||||||
|
tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
|
||||||
unlikely(b != btree_node_mem_ptr(tmp.k)))
|
unlikely(b != btree_node_mem_ptr(tmp.k)))
|
||||||
btree_node_mem_ptr_set(trans, path, level + 1, b);
|
btree_node_mem_ptr_set(trans, path, level + 1, b);
|
||||||
|
|
||||||
if (flags & BTREE_ITER_PREFETCH)
|
|
||||||
ret = btree_path_prefetch(trans, path);
|
|
||||||
|
|
||||||
if (btree_node_read_locked(path, level + 1))
|
if (btree_node_read_locked(path, level + 1))
|
||||||
btree_node_unlock(path, level + 1);
|
btree_node_unlock(path, level + 1);
|
||||||
path->level = level;
|
path->level = level;
|
||||||
@ -2113,6 +2188,55 @@ struct bkey_i *__bch2_btree_trans_peek_updates(struct btree_iter *iter)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct bkey_i *__btree_trans_peek_journal(struct btree_trans *trans,
|
||||||
|
struct btree_path *path)
|
||||||
|
{
|
||||||
|
struct journal_keys *keys = &trans->c->journal_keys;
|
||||||
|
size_t idx = bch2_journal_key_search(keys, path->btree_id,
|
||||||
|
path->level, path->pos);
|
||||||
|
|
||||||
|
while (idx < keys->nr && keys->d[idx].overwritten)
|
||||||
|
idx++;
|
||||||
|
|
||||||
|
return (idx < keys->nr &&
|
||||||
|
keys->d[idx].btree_id == path->btree_id &&
|
||||||
|
keys->d[idx].level == path->level)
|
||||||
|
? keys->d[idx].k
|
||||||
|
: NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline
|
||||||
|
struct bkey_s_c btree_trans_peek_slot_journal(struct btree_trans *trans,
|
||||||
|
struct btree_iter *iter)
|
||||||
|
{
|
||||||
|
struct bkey_i *k = __btree_trans_peek_journal(trans, iter->path);
|
||||||
|
|
||||||
|
if (k && !bpos_cmp(k->k.p, iter->pos)) {
|
||||||
|
iter->k = k->k;
|
||||||
|
return bkey_i_to_s_c(k);
|
||||||
|
} else {
|
||||||
|
return bkey_s_c_null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static noinline
|
||||||
|
struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
|
||||||
|
struct btree_iter *iter,
|
||||||
|
struct bkey_s_c k)
|
||||||
|
{
|
||||||
|
struct bkey_i *next_journal =
|
||||||
|
__btree_trans_peek_journal(trans, iter->path);
|
||||||
|
|
||||||
|
if (next_journal &&
|
||||||
|
bpos_cmp(next_journal->k.p,
|
||||||
|
k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
|
||||||
|
iter->k = next_journal->k;
|
||||||
|
k = bkey_i_to_s_c(next_journal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return k;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
|
* bch2_btree_iter_peek: returns first key greater than or equal to iterator's
|
||||||
* current position
|
* current position
|
||||||
@ -2141,16 +2265,12 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
next_update = btree_trans_peek_updates(iter);
|
|
||||||
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
|
||||||
|
|
||||||
/* * In the btree, deleted keys sort before non deleted: */
|
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
|
||||||
if (k.k && bkey_deleted(k.k) &&
|
k = btree_trans_peek_journal(trans, iter, k);
|
||||||
(!next_update ||
|
|
||||||
bpos_cmp(k.k->p, next_update->k.p) <= 0)) {
|
next_update = btree_trans_peek_updates(iter);
|
||||||
search_key = k.k->p;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (next_update &&
|
if (next_update &&
|
||||||
bpos_cmp(next_update->k.p,
|
bpos_cmp(next_update->k.p,
|
||||||
@ -2159,6 +2279,20 @@ struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
|
|||||||
k = bkey_i_to_s_c(next_update);
|
k = bkey_i_to_s_c(next_update);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (k.k && bkey_deleted(k.k)) {
|
||||||
|
/*
|
||||||
|
* If we've got a whiteout, and it's after the search
|
||||||
|
* key, advance the search key to the whiteout instead
|
||||||
|
* of just after the whiteout - it might be a btree
|
||||||
|
* whiteout, with a real key at the same position, since
|
||||||
|
* in the btree deleted keys sort before non deleted.
|
||||||
|
*/
|
||||||
|
search_key = bpos_cmp(search_key, k.k->p)
|
||||||
|
? k.k->p
|
||||||
|
: bpos_successor(k.k->p);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (likely(k.k)) {
|
if (likely(k.k)) {
|
||||||
/*
|
/*
|
||||||
* We can never have a key in a leaf node at POS_MAX, so
|
* We can never have a key in a leaf node at POS_MAX, so
|
||||||
@ -2249,6 +2383,10 @@ struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *iter)
|
|||||||
|
|
||||||
EBUG_ON(iter->path->cached || iter->path->level);
|
EBUG_ON(iter->path->cached || iter->path->level);
|
||||||
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
|
EBUG_ON(iter->flags & BTREE_ITER_WITH_UPDATES);
|
||||||
|
|
||||||
|
if (iter->flags & BTREE_ITER_WITH_JOURNAL)
|
||||||
|
return bkey_s_c_err(-EIO);
|
||||||
|
|
||||||
bch2_btree_iter_verify(iter);
|
bch2_btree_iter_verify(iter);
|
||||||
bch2_btree_iter_verify_entry_exit(iter);
|
bch2_btree_iter_verify_entry_exit(iter);
|
||||||
|
|
||||||
@ -2395,23 +2533,18 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
|||||||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
|
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
|
||||||
struct bkey_i *next_update;
|
struct bkey_i *next_update;
|
||||||
|
|
||||||
next_update = btree_trans_peek_updates(iter);
|
if ((next_update = btree_trans_peek_updates(iter)) &&
|
||||||
if (next_update &&
|
|
||||||
!bpos_cmp(next_update->k.p, iter->pos)) {
|
!bpos_cmp(next_update->k.p, iter->pos)) {
|
||||||
iter->k = next_update->k;
|
iter->k = next_update->k;
|
||||||
k = bkey_i_to_s_c(next_update);
|
k = bkey_i_to_s_c(next_update);
|
||||||
} else {
|
goto out;
|
||||||
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!k.k ||
|
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL) &&
|
||||||
((iter->flags & BTREE_ITER_ALL_SNAPSHOTS)
|
(k = btree_trans_peek_slot_journal(trans, iter)).k)
|
||||||
? bpos_cmp(iter->pos, k.k->p)
|
goto out;
|
||||||
: bkey_cmp(iter->pos, k.k->p))) {
|
|
||||||
bkey_init(&iter->k);
|
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
|
||||||
iter->k.p = iter->pos;
|
|
||||||
k = (struct bkey_s_c) { &iter->k, NULL };
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
struct bpos next;
|
struct bpos next;
|
||||||
|
|
||||||
@ -2455,7 +2588,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
|
|||||||
k = (struct bkey_s_c) { &iter->k, NULL };
|
k = (struct bkey_s_c) { &iter->k, NULL };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
iter->path->should_be_locked = true;
|
iter->path->should_be_locked = true;
|
||||||
|
|
||||||
bch2_btree_iter_verify_entry_exit(iter);
|
bch2_btree_iter_verify_entry_exit(iter);
|
||||||
@ -2635,6 +2768,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
|
|||||||
btree_type_has_snapshots(btree_id))
|
btree_type_has_snapshots(btree_id))
|
||||||
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
|
flags |= BTREE_ITER_FILTER_SNAPSHOTS;
|
||||||
|
|
||||||
|
if (trans->journal_replay_not_finished)
|
||||||
|
flags |= BTREE_ITER_WITH_JOURNAL;
|
||||||
|
|
||||||
iter->trans = trans;
|
iter->trans = trans;
|
||||||
iter->path = NULL;
|
iter->path = NULL;
|
||||||
iter->btree_id = btree_id;
|
iter->btree_id = btree_id;
|
||||||
@ -2801,6 +2937,8 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
|
|||||||
memset(trans, 0, sizeof(*trans));
|
memset(trans, 0, sizeof(*trans));
|
||||||
trans->c = c;
|
trans->c = c;
|
||||||
trans->ip = _RET_IP_;
|
trans->ip = _RET_IP_;
|
||||||
|
trans->journal_replay_not_finished =
|
||||||
|
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
|
||||||
|
|
||||||
bch2_trans_alloc_paths(trans, c);
|
bch2_trans_alloc_paths(trans, c);
|
||||||
|
|
||||||
|
@ -207,10 +207,11 @@ struct btree_node_iter {
|
|||||||
#define BTREE_ITER_CACHED_NOFILL (1 << 8)
|
#define BTREE_ITER_CACHED_NOFILL (1 << 8)
|
||||||
#define BTREE_ITER_CACHED_NOCREATE (1 << 9)
|
#define BTREE_ITER_CACHED_NOCREATE (1 << 9)
|
||||||
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
#define BTREE_ITER_WITH_UPDATES (1 << 10)
|
||||||
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 11)
|
#define BTREE_ITER_WITH_JOURNAL (1 << 11)
|
||||||
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
#define __BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
|
||||||
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 13)
|
#define BTREE_ITER_ALL_SNAPSHOTS (1 << 13)
|
||||||
#define BTREE_ITER_NOPRESERVE (1 << 14)
|
#define BTREE_ITER_FILTER_SNAPSHOTS (1 << 14)
|
||||||
|
#define BTREE_ITER_NOPRESERVE (1 << 15)
|
||||||
|
|
||||||
enum btree_path_uptodate {
|
enum btree_path_uptodate {
|
||||||
BTREE_ITER_UPTODATE = 0,
|
BTREE_ITER_UPTODATE = 0,
|
||||||
@ -381,6 +382,7 @@ struct btree_trans {
|
|||||||
bool restarted:1;
|
bool restarted:1;
|
||||||
bool paths_sorted:1;
|
bool paths_sorted:1;
|
||||||
bool journal_transaction_names:1;
|
bool journal_transaction_names:1;
|
||||||
|
bool journal_replay_not_finished:1;
|
||||||
/*
|
/*
|
||||||
* For when bch2_trans_update notices we'll be splitting a compressed
|
* For when bch2_trans_update notices we'll be splitting a compressed
|
||||||
* extent:
|
* extent:
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
#include "journal.h"
|
#include "journal.h"
|
||||||
#include "journal_reclaim.h"
|
#include "journal_reclaim.h"
|
||||||
#include "keylist.h"
|
#include "keylist.h"
|
||||||
|
#include "recovery.h"
|
||||||
#include "replicas.h"
|
#include "replicas.h"
|
||||||
#include "super-io.h"
|
#include "super-io.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
@ -1146,6 +1147,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
|
|||||||
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
|
BUG_ON(insert->k.type == KEY_TYPE_btree_ptr_v2 &&
|
||||||
!btree_ptr_sectors_written(insert));
|
!btree_ptr_sectors_written(insert));
|
||||||
|
|
||||||
|
if (unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
|
||||||
|
bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p);
|
||||||
|
|
||||||
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
|
invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), btree_node_type(b)) ?:
|
||||||
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
|
bch2_bkey_in_btree_node(b, bkey_i_to_s_c(insert));
|
||||||
if (invalid) {
|
if (invalid) {
|
||||||
|
@ -711,7 +711,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
|
|||||||
|
|
||||||
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
|
ret = bch2_trans_commit_write_locked(trans, stopped_at, trace_ip);
|
||||||
|
|
||||||
if (!ret && unlikely(!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags)))
|
if (!ret && unlikely(trans->journal_replay_not_finished))
|
||||||
bch2_drop_overwrites_from_journal(trans);
|
bch2_drop_overwrites_from_journal(trans);
|
||||||
|
|
||||||
trans_for_each_update(trans, i)
|
trans_for_each_update(trans, i)
|
||||||
|
@ -1558,50 +1558,48 @@ void bch2_stripes_heap_start(struct bch_fs *c)
|
|||||||
bch2_stripes_heap_insert(c, m, iter.pos);
|
bch2_stripes_heap_insert(c, m, iter.pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k)
|
|
||||||
{
|
|
||||||
const struct bch_stripe *s;
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct stripe *m;
|
|
||||||
unsigned i;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (k.k->type != KEY_TYPE_stripe)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
s = bkey_s_c_to_stripe(k).v;
|
|
||||||
|
|
||||||
m = genradix_ptr(&c->stripes, k.k->p.offset);
|
|
||||||
m->alive = true;
|
|
||||||
m->sectors = le16_to_cpu(s->sectors);
|
|
||||||
m->algorithm = s->algorithm;
|
|
||||||
m->nr_blocks = s->nr_blocks;
|
|
||||||
m->nr_redundant = s->nr_redundant;
|
|
||||||
m->blocks_nonempty = 0;
|
|
||||||
|
|
||||||
for (i = 0; i < s->nr_blocks; i++)
|
|
||||||
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
|
|
||||||
|
|
||||||
spin_lock(&c->ec_stripes_heap_lock);
|
|
||||||
bch2_stripes_heap_update(c, m, k.k->p.offset);
|
|
||||||
spin_unlock(&c->ec_stripes_heap_lock);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_stripes_read(struct bch_fs *c)
|
int bch2_stripes_read(struct bch_fs *c)
|
||||||
{
|
{
|
||||||
struct btree_trans trans;
|
struct btree_trans trans;
|
||||||
|
struct btree_iter iter;
|
||||||
|
struct bkey_s_c k;
|
||||||
|
const struct bch_stripe *s;
|
||||||
|
struct stripe *m;
|
||||||
|
unsigned i;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
bch2_trans_init(&trans, c, 0, 0);
|
bch2_trans_init(&trans, c, 0, 0);
|
||||||
ret = bch2_btree_and_journal_walk(&trans, BTREE_ID_stripes,
|
|
||||||
bch2_stripes_read_fn);
|
for_each_btree_key(&trans, iter, BTREE_ID_stripes, POS_MIN,
|
||||||
|
BTREE_ITER_PREFETCH, k, ret) {
|
||||||
|
if (k.k->type != KEY_TYPE_stripe)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL);
|
||||||
|
if (ret)
|
||||||
|
break;
|
||||||
|
|
||||||
|
s = bkey_s_c_to_stripe(k).v;
|
||||||
|
|
||||||
|
m = genradix_ptr(&c->stripes, k.k->p.offset);
|
||||||
|
m->alive = true;
|
||||||
|
m->sectors = le16_to_cpu(s->sectors);
|
||||||
|
m->algorithm = s->algorithm;
|
||||||
|
m->nr_blocks = s->nr_blocks;
|
||||||
|
m->nr_redundant = s->nr_redundant;
|
||||||
|
m->blocks_nonempty = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < s->nr_blocks; i++)
|
||||||
|
m->blocks_nonempty += !!stripe_blockcount_get(s, i);
|
||||||
|
|
||||||
|
spin_lock(&c->ec_stripes_heap_lock);
|
||||||
|
bch2_stripes_heap_update(c, m, k.k->p.offset);
|
||||||
|
spin_unlock(&c->ec_stripes_heap_lock);
|
||||||
|
}
|
||||||
|
bch2_trans_iter_exit(&trans, &iter);
|
||||||
|
|
||||||
bch2_trans_exit(&trans);
|
bch2_trans_exit(&trans);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
bch_err(c, "error reading stripes: %i", ret);
|
bch_err(c, "error reading stripes: %i", ret);
|
||||||
|
|
||||||
|
@ -59,23 +59,21 @@ static void zero_out_btree_mem_ptr(struct journal_keys *keys)
|
|||||||
static int __journal_key_cmp(enum btree_id l_btree_id,
|
static int __journal_key_cmp(enum btree_id l_btree_id,
|
||||||
unsigned l_level,
|
unsigned l_level,
|
||||||
struct bpos l_pos,
|
struct bpos l_pos,
|
||||||
struct journal_key *r)
|
const struct journal_key *r)
|
||||||
{
|
{
|
||||||
return (cmp_int(l_btree_id, r->btree_id) ?:
|
return (cmp_int(l_btree_id, r->btree_id) ?:
|
||||||
cmp_int(l_level, r->level) ?:
|
cmp_int(l_level, r->level) ?:
|
||||||
bpos_cmp(l_pos, r->k->k.p));
|
bpos_cmp(l_pos, r->k->k.p));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int journal_key_cmp(struct journal_key *l, struct journal_key *r)
|
static int journal_key_cmp(const struct journal_key *l, const struct journal_key *r)
|
||||||
{
|
{
|
||||||
return (cmp_int(l->btree_id, r->btree_id) ?:
|
return __journal_key_cmp(l->btree_id, l->level, l->k->k.p, r);
|
||||||
cmp_int(l->level, r->level) ?:
|
|
||||||
bpos_cmp(l->k->k.p, r->k->k.p));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t journal_key_search(struct journal_keys *journal_keys,
|
size_t bch2_journal_key_search(struct journal_keys *journal_keys,
|
||||||
enum btree_id id, unsigned level,
|
enum btree_id id, unsigned level,
|
||||||
struct bpos pos)
|
struct bpos pos)
|
||||||
{
|
{
|
||||||
size_t l = 0, r = journal_keys->nr, m;
|
size_t l = 0, r = journal_keys->nr, m;
|
||||||
|
|
||||||
@ -125,7 +123,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
|||||||
};
|
};
|
||||||
struct journal_keys *keys = &c->journal_keys;
|
struct journal_keys *keys = &c->journal_keys;
|
||||||
struct journal_iter *iter;
|
struct journal_iter *iter;
|
||||||
unsigned idx = journal_key_search(keys, id, level, k->k.p);
|
size_t idx = bch2_journal_key_search(keys, id, level, k->k.p);
|
||||||
|
|
||||||
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
|
BUG_ON(test_bit(BCH_FS_RW, &c->flags));
|
||||||
|
|
||||||
@ -164,6 +162,11 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Can only be used from the recovery thread while we're still RO - can't be
|
||||||
|
* used once we've got RW, as journal_keys is at that point used by multiple
|
||||||
|
* threads:
|
||||||
|
*/
|
||||||
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
|
int bch2_journal_key_insert(struct bch_fs *c, enum btree_id id,
|
||||||
unsigned level, struct bkey_i *k)
|
unsigned level, struct bkey_i *k)
|
||||||
{
|
{
|
||||||
@ -196,7 +199,7 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
|||||||
unsigned level, struct bpos pos)
|
unsigned level, struct bpos pos)
|
||||||
{
|
{
|
||||||
struct journal_keys *keys = &c->journal_keys;
|
struct journal_keys *keys = &c->journal_keys;
|
||||||
size_t idx = journal_key_search(keys, btree, level, pos);
|
size_t idx = bch2_journal_key_search(keys, btree, level, pos);
|
||||||
|
|
||||||
if (idx < keys->nr &&
|
if (idx < keys->nr &&
|
||||||
keys->d[idx].btree_id == btree &&
|
keys->d[idx].btree_id == btree &&
|
||||||
@ -207,15 +210,18 @@ void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
|
|||||||
|
|
||||||
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
|
static struct bkey_i *bch2_journal_iter_peek(struct journal_iter *iter)
|
||||||
{
|
{
|
||||||
struct journal_key *k = iter->idx - iter->keys->nr
|
struct journal_key *k = iter->keys->d + iter->idx;
|
||||||
? iter->keys->d + iter->idx : NULL;
|
|
||||||
|
|
||||||
if (k &&
|
while (k < iter->keys->d + iter->keys->nr &&
|
||||||
k->btree_id == iter->btree_id &&
|
k->btree_id == iter->btree_id &&
|
||||||
k->level == iter->level)
|
k->level == iter->level) {
|
||||||
return k->k;
|
if (!k->overwritten)
|
||||||
|
return k->k;
|
||||||
|
|
||||||
|
iter->idx++;
|
||||||
|
k = iter->keys->d + iter->idx;
|
||||||
|
}
|
||||||
|
|
||||||
iter->idx = iter->keys->nr;
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -238,8 +244,7 @@ static void bch2_journal_iter_init(struct bch_fs *c,
|
|||||||
iter->btree_id = id;
|
iter->btree_id = id;
|
||||||
iter->level = level;
|
iter->level = level;
|
||||||
iter->keys = &c->journal_keys;
|
iter->keys = &c->journal_keys;
|
||||||
iter->idx = journal_key_search(&c->journal_keys, id, level, pos);
|
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
|
||||||
list_add(&iter->list, &c->journal_iters);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
|
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
|
||||||
@ -325,106 +330,33 @@ void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *iter)
|
|||||||
bch2_journal_iter_exit(&iter->journal);
|
bch2_journal_iter_exit(&iter->journal);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||||
struct bch_fs *c,
|
struct bch_fs *c,
|
||||||
struct btree *b)
|
struct btree *b,
|
||||||
|
struct btree_node_iter node_iter,
|
||||||
|
struct bpos pos)
|
||||||
{
|
{
|
||||||
memset(iter, 0, sizeof(*iter));
|
memset(iter, 0, sizeof(*iter));
|
||||||
|
|
||||||
iter->b = b;
|
iter->b = b;
|
||||||
bch2_btree_node_iter_init_from_start(&iter->node_iter, iter->b);
|
iter->node_iter = node_iter;
|
||||||
bch2_journal_iter_init(c, &iter->journal,
|
bch2_journal_iter_init(c, &iter->journal, b->c.btree_id, b->c.level, pos);
|
||||||
b->c.btree_id, b->c.level, b->data->min_key);
|
INIT_LIST_HEAD(&iter->journal.list);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Walk btree, overlaying keys from the journal: */
|
/*
|
||||||
|
* this version is used by btree_gc before filesystem has gone RW and
|
||||||
static void btree_and_journal_iter_prefetch(struct bch_fs *c, struct btree *b,
|
* multithreaded, so uses the journal_iters list:
|
||||||
struct btree_and_journal_iter iter)
|
*/
|
||||||
|
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *iter,
|
||||||
|
struct bch_fs *c,
|
||||||
|
struct btree *b)
|
||||||
{
|
{
|
||||||
unsigned i = 0, nr = b->c.level > 1 ? 2 : 16;
|
struct btree_node_iter node_iter;
|
||||||
struct bkey_s_c k;
|
|
||||||
struct bkey_buf tmp;
|
|
||||||
|
|
||||||
BUG_ON(!b->c.level);
|
bch2_btree_node_iter_init_from_start(&node_iter, b);
|
||||||
|
__bch2_btree_and_journal_iter_init_node_iter(iter, c, b, node_iter, b->data->min_key);
|
||||||
bch2_bkey_buf_init(&tmp);
|
list_add(&iter->journal.list, &c->journal_iters);
|
||||||
|
|
||||||
while (i < nr &&
|
|
||||||
(k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
|
||||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
|
||||||
|
|
||||||
bch2_btree_node_prefetch(c, NULL, NULL, tmp.k,
|
|
||||||
b->c.btree_id, b->c.level - 1);
|
|
||||||
|
|
||||||
bch2_btree_and_journal_iter_advance(&iter);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
bch2_bkey_buf_exit(&tmp, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int bch2_btree_and_journal_walk_recurse(struct btree_trans *trans, struct btree *b,
|
|
||||||
enum btree_id btree_id,
|
|
||||||
btree_walk_key_fn key_fn)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct btree_and_journal_iter iter;
|
|
||||||
struct bkey_s_c k;
|
|
||||||
struct bkey_buf tmp;
|
|
||||||
struct btree *child;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
bch2_bkey_buf_init(&tmp);
|
|
||||||
bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
|
|
||||||
|
|
||||||
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
|
|
||||||
if (b->c.level) {
|
|
||||||
bch2_bkey_buf_reassemble(&tmp, c, k);
|
|
||||||
|
|
||||||
child = bch2_btree_node_get_noiter(c, tmp.k,
|
|
||||||
b->c.btree_id, b->c.level - 1,
|
|
||||||
false);
|
|
||||||
|
|
||||||
ret = PTR_ERR_OR_ZERO(child);
|
|
||||||
if (ret)
|
|
||||||
break;
|
|
||||||
|
|
||||||
btree_and_journal_iter_prefetch(c, b, iter);
|
|
||||||
|
|
||||||
ret = bch2_btree_and_journal_walk_recurse(trans, child,
|
|
||||||
btree_id, key_fn);
|
|
||||||
six_unlock_read(&child->c.lock);
|
|
||||||
} else {
|
|
||||||
ret = key_fn(trans, k);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ret)
|
|
||||||
break;
|
|
||||||
|
|
||||||
bch2_btree_and_journal_iter_advance(&iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
bch2_btree_and_journal_iter_exit(&iter);
|
|
||||||
bch2_bkey_buf_exit(&tmp, c);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bch2_btree_and_journal_walk(struct btree_trans *trans, enum btree_id btree_id,
|
|
||||||
btree_walk_key_fn key_fn)
|
|
||||||
{
|
|
||||||
struct bch_fs *c = trans->c;
|
|
||||||
struct btree *b = c->btree_roots[btree_id].b;
|
|
||||||
int ret = 0;
|
|
||||||
|
|
||||||
if (btree_node_fake(b))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
six_lock_read(&b->c.lock, NULL, NULL);
|
|
||||||
ret = bch2_btree_and_journal_walk_recurse(trans, b, btree_id, key_fn);
|
|
||||||
six_unlock_read(&b->c.lock);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* sort and dedup all keys in the journal: */
|
/* sort and dedup all keys in the journal: */
|
||||||
@ -449,9 +381,7 @@ static int journal_sort_key_cmp(const void *_l, const void *_r)
|
|||||||
const struct journal_key *l = _l;
|
const struct journal_key *l = _l;
|
||||||
const struct journal_key *r = _r;
|
const struct journal_key *r = _r;
|
||||||
|
|
||||||
return cmp_int(l->btree_id, r->btree_id) ?:
|
return journal_key_cmp(l, r) ?:
|
||||||
cmp_int(l->level, r->level) ?:
|
|
||||||
bpos_cmp(l->k->k.p, r->k->k.p) ?:
|
|
||||||
cmp_int(l->journal_seq, r->journal_seq) ?:
|
cmp_int(l->journal_seq, r->journal_seq) ?:
|
||||||
cmp_int(l->journal_offset, r->journal_offset);
|
cmp_int(l->journal_offset, r->journal_offset);
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,9 @@ struct btree_and_journal_iter {
|
|||||||
} last;
|
} last;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t bch2_journal_key_search(struct journal_keys *, enum btree_id,
|
||||||
|
unsigned, struct bpos);
|
||||||
|
|
||||||
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
|
int bch2_journal_key_insert_take(struct bch_fs *, enum btree_id,
|
||||||
unsigned, struct bkey_i *);
|
unsigned, struct bkey_i *);
|
||||||
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
|
||||||
@ -45,14 +48,13 @@ struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *
|
|||||||
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
|
struct bkey_s_c bch2_btree_and_journal_iter_next(struct btree_and_journal_iter *);
|
||||||
|
|
||||||
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
void bch2_btree_and_journal_iter_exit(struct btree_and_journal_iter *);
|
||||||
|
void __bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||||
|
struct bch_fs *, struct btree *,
|
||||||
|
struct btree_node_iter, struct bpos);
|
||||||
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
void bch2_btree_and_journal_iter_init_node_iter(struct btree_and_journal_iter *,
|
||||||
struct bch_fs *,
|
struct bch_fs *,
|
||||||
struct btree *);
|
struct btree *);
|
||||||
|
|
||||||
typedef int (*btree_walk_key_fn)(struct btree_trans *, struct bkey_s_c);
|
|
||||||
|
|
||||||
int bch2_btree_and_journal_walk(struct btree_trans *, enum btree_id, btree_walk_key_fn);
|
|
||||||
|
|
||||||
void bch2_journal_keys_free(struct journal_keys *);
|
void bch2_journal_keys_free(struct journal_keys *);
|
||||||
void bch2_journal_entries_free(struct list_head *);
|
void bch2_journal_entries_free(struct list_head *);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user