bcachefs: Improve bset compaction

The previous patch that fixed btree nodes being written too aggressively
now meant that we weren't sorting btree node bsets optimally - this
patch fixes that.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2021-04-06 15:33:19 -04:00 committed by Kent Overstreet
parent 241e26369e
commit 2177147b39
4 changed files with 39 additions and 21 deletions

View File

@ -215,7 +215,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush)
if (bch2_verify_btree_ondisk)
bch2_btree_node_write(c, b, SIX_LOCK_intent);
else
__bch2_btree_node_write(c, b, SIX_LOCK_read);
__bch2_btree_node_write(c, b);
/* wait for any in flight btree write */
btree_node_wait_on_io(b);

View File

@ -241,7 +241,6 @@ bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
}
static void btree_node_sort(struct bch_fs *c, struct btree *b,
struct btree_iter *iter,
unsigned start_idx,
unsigned end_idx,
bool filter_whiteouts)
@ -377,8 +376,7 @@ void bch2_btree_sort_into(struct bch_fs *c,
* We're about to add another bset to the btree node, so if there's currently
* too many bsets - sort some of them together:
*/
static bool btree_node_compact(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
static bool btree_node_compact(struct bch_fs *c, struct btree *b)
{
unsigned unwritten_idx;
bool ret = false;
@ -390,13 +388,13 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b,
break;
if (b->nsets - unwritten_idx > 1) {
btree_node_sort(c, b, iter, unwritten_idx,
btree_node_sort(c, b, unwritten_idx,
b->nsets, false);
ret = true;
}
if (unwritten_idx > 1) {
btree_node_sort(c, b, iter, 0, unwritten_idx, false);
btree_node_sort(c, b, 0, unwritten_idx, false);
ret = true;
}
@ -426,12 +424,30 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
struct btree_iter *iter)
{
struct btree_node_entry *bne;
bool did_sort;
bool reinit_iter = false;
EBUG_ON(!(b->c.lock.state.seq & 1));
EBUG_ON(iter && iter->l[b->c.level].b != b);
BUG_ON(bset_written(b, bset(b, &b->set[1])));
did_sort = btree_node_compact(c, b, iter);
if (b->nsets == MAX_BSETS) {
unsigned log_u64s[] = {
ilog2(bset_u64s(&b->set[0])),
ilog2(bset_u64s(&b->set[1])),
ilog2(bset_u64s(&b->set[2])),
};
if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) {
bch2_btree_node_write(c, b, SIX_LOCK_write);
reinit_iter = true;
}
}
if (b->nsets == MAX_BSETS &&
btree_node_compact(c, b))
reinit_iter = true;
BUG_ON(b->nsets >= MAX_BSETS);
bne = want_new_bset(c, b);
if (bne)
@ -439,7 +455,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b,
bch2_btree_build_aux_trees(b);
if (iter && did_sort)
if (iter && reinit_iter)
bch2_btree_iter_reinit_node(iter, b);
}
@ -1324,8 +1340,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
return ret;
}
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_type_held)
void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
{
struct btree_write_bio *wbio;
struct bset_tree *t;
@ -1596,7 +1611,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
* single bset:
*/
if (b->nsets > 1) {
btree_node_sort(c, b, NULL, 0, b->nsets, true);
btree_node_sort(c, b, 0, b->nsets, true);
invalidated_iter = true;
} else {
invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL);
@ -1626,13 +1641,12 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b)
* Use this one if the node is intent locked:
*/
void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
enum six_lock_type lock_type_held)
enum six_lock_type lock_type_held)
{
BUG_ON(lock_type_held == SIX_LOCK_write);
if (lock_type_held == SIX_LOCK_intent ||
six_lock_tryupgrade(&b->c.lock)) {
__bch2_btree_node_write(c, b, SIX_LOCK_intent);
(lock_type_held == SIX_LOCK_read &&
six_lock_tryupgrade(&b->c.lock))) {
__bch2_btree_node_write(c, b);
/* don't cycle lock unnecessarily: */
if (btree_node_just_written(b) &&
@ -1644,7 +1658,10 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
if (lock_type_held == SIX_LOCK_read)
six_lock_downgrade(&b->c.lock);
} else {
__bch2_btree_node_write(c, b, SIX_LOCK_read);
__bch2_btree_node_write(c, b);
if (lock_type_held == SIX_LOCK_write &&
btree_node_just_written(b))
bch2_btree_post_write_cleanup(c, b);
}
}

View File

@ -144,8 +144,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *,
struct btree_write *);
void bch2_btree_write_error_work(struct work_struct *);
void __bch2_btree_node_write(struct bch_fs *, struct btree *,
enum six_lock_type);
void __bch2_btree_node_write(struct bch_fs *, struct btree *);
bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
void bch2_btree_node_write(struct bch_fs *, struct btree *,

View File

@ -256,13 +256,15 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c,
return remaining;
}
#define BTREE_WRITE_SET_U64s_BITS 9
static inline unsigned btree_write_set_buffer(struct btree *b)
{
/*
* Could buffer up larger amounts of keys for btrees with larger keys,
* pending benchmarking:
*/
return 4 << 10;
return 8 << BTREE_WRITE_SET_U64s_BITS;
}
static inline struct btree_node_entry *want_new_bset(struct bch_fs *c,