mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 14:12:06 +00:00
bcachefs: Mark stripe buckets with correct data type
Currently, we don't use bucket data type for tracking whether buckets are part of a stripe; parity buckets are BCH_DATA_parity, but data buckets in a stripe are BCH_DATA_user. There's a separate counter, buckets_ec, outside the BCH_DATA_TYPES system for tracking number of buckets on a device that are part of a stripe. The trouble with this approach is that it's too coarse grained, and we need better information on fragmentation for debugging copygc. With this patch, data buckets in a stripe are now tracked as BCH_DATA_stripe buckets. This doesn't yet differentiate between erasure coded and non-erasure coded data in a stripe bucket, nor do we yet track empty data buckets in stripes. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
3329cf1bb9
commit
910659763e
@ -44,10 +44,10 @@ static inline enum bch_data_type __alloc_data_type(u32 dirty_sectors,
|
|||||||
struct bch_alloc_v4 a,
|
struct bch_alloc_v4 a,
|
||||||
enum bch_data_type data_type)
|
enum bch_data_type data_type)
|
||||||
{
|
{
|
||||||
|
if (stripe)
|
||||||
|
return data_type == BCH_DATA_parity ? data_type : BCH_DATA_stripe;
|
||||||
if (dirty_sectors)
|
if (dirty_sectors)
|
||||||
return data_type;
|
return data_type;
|
||||||
if (stripe)
|
|
||||||
return BCH_DATA_stripe;
|
|
||||||
if (cached_sectors)
|
if (cached_sectors)
|
||||||
return BCH_DATA_cached;
|
return BCH_DATA_cached;
|
||||||
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
if (BCH_ALLOC_V4_NEED_DISCARD(&a))
|
||||||
@ -64,19 +64,31 @@ static inline enum bch_data_type alloc_data_type(struct bch_alloc_v4 a,
|
|||||||
a.stripe, a, data_type);
|
a.stripe, a, data_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type)
|
||||||
|
{
|
||||||
|
return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
|
static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a)
|
||||||
{
|
{
|
||||||
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
|
return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DATA_TYPES_MOVABLE \
|
||||||
|
((1U << BCH_DATA_btree)| \
|
||||||
|
(1U << BCH_DATA_user)| \
|
||||||
|
(1U << BCH_DATA_stripe))
|
||||||
|
|
||||||
|
static inline bool data_type_movable(enum bch_data_type type)
|
||||||
|
{
|
||||||
|
return (1U << type) & DATA_TYPES_MOVABLE;
|
||||||
|
}
|
||||||
|
|
||||||
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
|
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
|
||||||
struct bch_dev *ca)
|
struct bch_dev *ca)
|
||||||
{
|
{
|
||||||
if (a.data_type != BCH_DATA_btree &&
|
if (!data_type_movable(a.data_type) ||
|
||||||
a.data_type != BCH_DATA_user)
|
a.dirty_sectors >= ca->mi.bucket_size)
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (a.dirty_sectors >= ca->mi.bucket_size)
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
|
return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size);
|
||||||
|
@ -96,12 +96,20 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
|
|||||||
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
|
return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k->k_i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
|
||||||
|
struct bkey_s_c k, struct extent_ptr_decoded p)
|
||||||
|
{
|
||||||
|
return level ? BCH_DATA_btree :
|
||||||
|
p.has_ec ? BCH_DATA_stripe :
|
||||||
|
BCH_DATA_user;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
|
||||||
enum btree_id btree_id, unsigned level,
|
enum btree_id btree_id, unsigned level,
|
||||||
struct bkey_s_c k, struct extent_ptr_decoded p,
|
struct bkey_s_c k, struct extent_ptr_decoded p,
|
||||||
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
struct bpos *bucket_pos, struct bch_backpointer *bp)
|
||||||
{
|
{
|
||||||
enum bch_data_type data_type = level ? BCH_DATA_btree : BCH_DATA_user;
|
enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
|
||||||
s64 sectors = level ? btree_sectors(c) : k.k->size;
|
s64 sectors = level ? btree_sectors(c) : k.k->size;
|
||||||
u32 bucket_offset;
|
u32 bucket_offset;
|
||||||
|
|
||||||
|
@ -633,8 +633,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
|
|||||||
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
|
if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (fsck_err_on(g->data_type &&
|
if (fsck_err_on(bucket_data_type(g->data_type) &&
|
||||||
g->data_type != data_type, c,
|
bucket_data_type(g->data_type) != data_type, c,
|
||||||
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
|
"bucket %u:%zu different types of data in same bucket: %s, %s\n"
|
||||||
"while marking %s",
|
"while marking %s",
|
||||||
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr),
|
||||||
@ -1397,6 +1397,16 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
|||||||
if (gen_after(old->gen, gc.gen))
|
if (gen_after(old->gen, gc.gen))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
if (c->opts.reconstruct_alloc ||
|
||||||
|
fsck_err_on(new.data_type != gc.data_type, c,
|
||||||
|
"bucket %llu:%llu gen %u has wrong data_type"
|
||||||
|
": got %s, should be %s",
|
||||||
|
iter->pos.inode, iter->pos.offset,
|
||||||
|
gc.gen,
|
||||||
|
bch2_data_types[new.data_type],
|
||||||
|
bch2_data_types[gc.data_type]))
|
||||||
|
new.data_type = gc.data_type;
|
||||||
|
|
||||||
#define copy_bucket_field(_f) \
|
#define copy_bucket_field(_f) \
|
||||||
if (c->opts.reconstruct_alloc || \
|
if (c->opts.reconstruct_alloc || \
|
||||||
fsck_err_on(new._f != gc._f, c, \
|
fsck_err_on(new._f != gc._f, c, \
|
||||||
@ -1409,7 +1419,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
|
|||||||
new._f = gc._f; \
|
new._f = gc._f; \
|
||||||
|
|
||||||
copy_bucket_field(gen);
|
copy_bucket_field(gen);
|
||||||
copy_bucket_field(data_type);
|
|
||||||
copy_bucket_field(dirty_sectors);
|
copy_bucket_field(dirty_sectors);
|
||||||
copy_bucket_field(cached_sectors);
|
copy_bucket_field(cached_sectors);
|
||||||
copy_bucket_field(stripe_redundancy);
|
copy_bucket_field(stripe_redundancy);
|
||||||
|
@ -776,7 +776,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
|
||||||
unsigned nr_data = s->nr_blocks - s->nr_redundant;
|
unsigned nr_data = s->nr_blocks - s->nr_redundant;
|
||||||
bool parity = ptr_idx >= nr_data;
|
bool parity = ptr_idx >= nr_data;
|
||||||
enum bch_data_type data_type = parity ? BCH_DATA_parity : 0;
|
enum bch_data_type data_type = parity ? BCH_DATA_parity : BCH_DATA_stripe;
|
||||||
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
|
s64 sectors = parity ? le16_to_cpu(s->sectors) : 0;
|
||||||
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
|
const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx;
|
||||||
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
|
||||||
@ -811,8 +811,7 @@ static int mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
if (data_type)
|
g->data_type = data_type;
|
||||||
g->data_type = data_type;
|
|
||||||
g->dirty_sectors += sectors;
|
g->dirty_sectors += sectors;
|
||||||
|
|
||||||
g->stripe = k.k->p.offset;
|
g->stripe = k.k->p.offset;
|
||||||
@ -851,15 +850,17 @@ static int __mark_pointer(struct btree_trans *trans,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int bch2_mark_pointer(struct btree_trans *trans,
|
static int bch2_mark_pointer(struct btree_trans *trans,
|
||||||
|
enum btree_id btree_id, unsigned level,
|
||||||
struct bkey_s_c k,
|
struct bkey_s_c k,
|
||||||
struct extent_ptr_decoded p,
|
struct extent_ptr_decoded p,
|
||||||
s64 sectors, enum bch_data_type data_type,
|
s64 sectors,
|
||||||
unsigned flags)
|
unsigned flags)
|
||||||
{
|
{
|
||||||
u64 journal_seq = trans->journal_res.seq;
|
u64 journal_seq = trans->journal_res.seq;
|
||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
|
||||||
struct bucket old, new, *g;
|
struct bucket old, new, *g;
|
||||||
|
enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
|
||||||
u8 bucket_data_type;
|
u8 bucket_data_type;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
@ -963,8 +964,7 @@ int bch2_mark_extent(struct btree_trans *trans,
|
|||||||
if (flags & BTREE_TRIGGER_OVERWRITE)
|
if (flags & BTREE_TRIGGER_OVERWRITE)
|
||||||
disk_sectors = -disk_sectors;
|
disk_sectors = -disk_sectors;
|
||||||
|
|
||||||
ret = bch2_mark_pointer(trans, k, p, disk_sectors,
|
ret = bch2_mark_pointer(trans, btree_id, level, k, p, disk_sectors, flags);
|
||||||
data_type, flags);
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -1596,6 +1596,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
|
|
||||||
a->v.stripe = s.k->p.offset;
|
a->v.stripe = s.k->p.offset;
|
||||||
a->v.stripe_redundancy = s.v->nr_redundant;
|
a->v.stripe_redundancy = s.v->nr_redundant;
|
||||||
|
a->v.data_type = BCH_DATA_stripe;
|
||||||
} else {
|
} else {
|
||||||
if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
|
if (bch2_trans_inconsistent_on(a->v.stripe != s.k->p.offset ||
|
||||||
a->v.stripe_redundancy != s.v->nr_redundant, trans,
|
a->v.stripe_redundancy != s.v->nr_redundant, trans,
|
||||||
@ -1608,6 +1609,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans,
|
|||||||
|
|
||||||
a->v.stripe = 0;
|
a->v.stripe = 0;
|
||||||
a->v.stripe_redundancy = 0;
|
a->v.stripe_redundancy = 0;
|
||||||
|
a->v.data_type = alloc_data_type(a->v, BCH_DATA_user);
|
||||||
}
|
}
|
||||||
|
|
||||||
a->v.dirty_sectors += sectors;
|
a->v.dirty_sectors += sectors;
|
||||||
|
@ -55,8 +55,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
|
|||||||
|
|
||||||
a = bch2_alloc_to_v4(k, &_a);
|
a = bch2_alloc_to_v4(k, &_a);
|
||||||
*gen = a->gen;
|
*gen = a->gen;
|
||||||
ret = (a->data_type == BCH_DATA_btree ||
|
ret = data_type_movable(a->data_type) &&
|
||||||
a->data_type == BCH_DATA_user) &&
|
|
||||||
a->fragmentation_lru &&
|
a->fragmentation_lru &&
|
||||||
a->fragmentation_lru <= time;
|
a->fragmentation_lru <= time;
|
||||||
|
|
||||||
@ -158,13 +157,18 @@ unsigned long bch2_copygc_wait_amount(struct bch_fs *c)
|
|||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
unsigned dev_idx;
|
unsigned dev_idx;
|
||||||
s64 wait = S64_MAX, fragmented_allowed, fragmented;
|
s64 wait = S64_MAX, fragmented_allowed, fragmented;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
for_each_rw_member(ca, c, dev_idx) {
|
for_each_rw_member(ca, c, dev_idx) {
|
||||||
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
struct bch_dev_usage usage = bch2_dev_usage_read(ca);
|
||||||
|
|
||||||
fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) *
|
fragmented_allowed = ((__dev_buckets_available(ca, usage, RESERVE_none) *
|
||||||
ca->mi.bucket_size) >> 1);
|
ca->mi.bucket_size) >> 1);
|
||||||
fragmented = usage.d[BCH_DATA_user].fragmented;
|
fragmented = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < BCH_DATA_NR; i++)
|
||||||
|
if (data_type_movable(i))
|
||||||
|
fragmented += usage.d[i].fragmented;
|
||||||
|
|
||||||
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
|
wait = min(wait, max(0LL, fragmented_allowed - fragmented));
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user