From a34eef6dd179463e70a97bbf8453b7ca21d1e666 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 20 Oct 2024 20:02:09 -0400 Subject: [PATCH] bcachefs: Don't keep tons of cached pointers around We had a bug report where the data update path was creating an extent that failed to validate because it had too many pointers; almost all of them were cached. To fix this, we have: - want_cached_ptr(), a new helper that checks if we even want a cached pointer (is on appropriate target, device is readable). - bch2_extent_set_ptr_cached() now only sets a pointer cached if we want it. - bch2_extent_normalize_by_opts() now ensures that we only have a single cached pointer that we want. While working on this, it was noticed that this doesn't work well with reflinked data and per-file options. Another patch series is coming that plumbs through additional io path options through bch_extent_rebalance, with improved option handling. Reported-by: Reed Riley Signed-off-by: Kent Overstreet --- fs/bcachefs/data_update.c | 21 ++++++---- fs/bcachefs/data_update.h | 3 +- fs/bcachefs/extents.c | 88 +++++++++++++++++++++++++++++++-------- fs/bcachefs/extents.h | 5 ++- fs/bcachefs/move.c | 2 +- 5 files changed, 90 insertions(+), 29 deletions(-) diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index a6ee0beee6b0..8e75a852b358 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -236,7 +236,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, if (((1U << i) & m->data_opts.rewrite_ptrs) && (ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) && !ptr->cached) { - bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr); + bch2_extent_ptr_set_cached(c, &m->op.opts, + bkey_i_to_s(insert), ptr); rewrites_found |= 1U << i; } i++; @@ -284,7 +285,8 @@ restart_drop_extra_replicas: durability - ptr_durability >= m->op.opts.data_replicas) { durability -= ptr_durability; - bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr); + bch2_extent_ptr_set_cached(c, &m->op.opts, + bkey_i_to_s(insert), &entry->ptr); goto restart_drop_extra_replicas; } } @@ -295,7 +297,7 @@ restart_drop_extra_replicas: bch2_extent_ptr_decoded_append(insert, &p); bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 }); - bch2_extent_normalize(c, bkey_i_to_s(insert)); + bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert)); ret = bch2_sum_sector_overwrites(trans, &iter, insert, &should_check_enospc, @@ -558,7 +560,8 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m) int bch2_extent_drop_ptrs(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, - struct data_update_opts data_opts) + struct bch_io_opts *io_opts, + struct data_update_opts *data_opts) { struct bch_fs *c = trans->c; struct bkey_i *n; @@ -569,11 +572,11 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, if (ret) return ret; - while (data_opts.kill_ptrs) { - unsigned i = 0, drop = __fls(data_opts.kill_ptrs); + while (data_opts->kill_ptrs) { + unsigned i = 0, drop = __fls(data_opts->kill_ptrs); bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, i++ == drop); - data_opts.kill_ptrs ^= 1U << drop; + data_opts->kill_ptrs ^= 1U << drop; } /* @@ -581,7 +584,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, * will do the appropriate thing with it (turning it into a * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize(c, bkey_i_to_s(n)); + bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n)); /* * Since we're not inserting through an extent iterator @@ -720,7 +723,7 @@ int bch2_data_update_init(struct btree_trans *trans, m->data_opts.rewrite_ptrs = 0; /* if iter == NULL, it's just a promote */ if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); + ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts); goto out; } diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h index 8d36365bdea8..e4b50723428e 100644 --- a/fs/bcachefs/data_update.h +++ b/fs/bcachefs/data_update.h @@ -40,7 +40,8 @@ void bch2_data_update_read_done(struct data_update *, int bch2_extent_drop_ptrs(struct btree_trans *, struct btree_iter *, struct bkey_s_c, - struct data_update_opts); + struct bch_io_opts *, + struct data_update_opts *); void bch2_data_update_exit(struct data_update *); int bch2_data_update_init(struct btree_trans *, struct btree_iter *, diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index cc0d22085aef..c4e91d123849 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -978,31 +978,54 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke return NULL; } -void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr) +static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts, + struct bch_extent_ptr *ptr) +{ + if (!opts->promote_target || + !bch2_dev_in_target(c, ptr->dev, opts->promote_target)) + return false; + + struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev); + + return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr); +} + +void bch2_extent_ptr_set_cached(struct bch_fs *c, + struct bch_io_opts *opts, + struct bkey_s k, + struct bch_extent_ptr *ptr) { struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *entry; - union bch_extent_entry *ec = NULL; + struct extent_ptr_decoded p; - bkey_extent_entry_for_each(ptrs, entry) { - if (&entry->ptr == ptr) { - ptr->cached = true; - if (ec) - extent_entry_drop(k, ec); - return; - } - - if (extent_entry_is_stripe_ptr(entry)) - ec = entry; - else if (extent_entry_is_ptr(entry)) - ec = NULL; + rcu_read_lock(); + if (!want_cached_ptr(c, opts, ptr)) { + bch2_bkey_drop_ptr_noerror(k, ptr); + goto out; } + /* + * Stripes can't contain cached data, for - reasons. + * + * Possibly something we can fix in the future? + */ + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (&entry->ptr == ptr) { + if (p.has_ec) + bch2_bkey_drop_ptr_noerror(k, ptr); + else + ptr->cached = true; + goto out; + } + BUG(); +out: + rcu_read_unlock(); } /* - * bch_extent_normalize - clean up an extent, dropping stale pointers etc. + * bch2_extent_normalize - clean up an extent, dropping stale pointers etc. * * Returns true if @k should be dropped entirely * @@ -1016,8 +1039,39 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) rcu_read_lock(); bch2_bkey_drop_ptrs(k, ptr, ptr->cached && - (ca = bch2_dev_rcu(c, ptr->dev)) && - dev_ptr_stale_rcu(ca, ptr) > 0); + (!(ca = bch2_dev_rcu(c, ptr->dev)) || + dev_ptr_stale_rcu(ca, ptr) > 0)); + rcu_read_unlock(); + + return bkey_deleted(k.k); +} + +/* + * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc. + * + * Like bch2_extent_normalize(), but also only keeps a single cached pointer on + * the promote target. + */ +bool bch2_extent_normalize_by_opts(struct bch_fs *c, + struct bch_io_opts *opts, + struct bkey_s k) +{ + struct bkey_ptrs ptrs; + bool have_cached_ptr; + + rcu_read_lock(); +restart_drop_ptrs: + ptrs = bch2_bkey_ptrs(k); + have_cached_ptr = false; + + bkey_for_each_ptr(ptrs, ptr) + if (ptr->cached) { + if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) { + bch2_bkey_drop_ptr(k, ptr); + goto restart_drop_ptrs; + } + have_cached_ptr = true; + } rcu_read_unlock(); return bkey_deleted(k.k); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 923a5f1849a8..bcffcf60aaaf 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -686,9 +686,12 @@ bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c); struct bch_extent_ptr * bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s); -void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *); +void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_io_opts *, + struct bkey_s, struct bch_extent_ptr *); +bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_io_opts *, struct bkey_s); bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); + void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 8c456d8b8b99..0ef4a86850bb 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -266,7 +266,7 @@ int bch2_move_extent(struct moving_context *ctxt, if (!data_opts.rewrite_ptrs && !data_opts.extra_replicas) { if (data_opts.kill_ptrs) - return bch2_extent_drop_ptrs(trans, iter, k, data_opts); + return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); return 0; }