bcachefs: Redo checks for sufficient devices

When the replicas mechanism was added, for tracking data by which drives
it's replicated on, the check for whether we have sufficient devices was
never updated to make use of it. This patch finally does that.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2021-02-06 23:17:26 -05:00 committed by Kent Overstreet
parent 5d428c7c64
commit fcb3431be8
7 changed files with 50 additions and 109 deletions

View File

@ -14,6 +14,9 @@
#define BCH_FORCE_IF_DATA_DEGRADED (1 << 2)
#define BCH_FORCE_IF_METADATA_DEGRADED (1 << 3)
#define BCH_FORCE_IF_LOST \
(BCH_FORCE_IF_DATA_LOST| \
BCH_FORCE_IF_METADATA_LOST)
#define BCH_FORCE_IF_DEGRADED \
(BCH_FORCE_IF_DATA_DEGRADED| \
BCH_FORCE_IF_METADATA_DEGRADED)

View File

@ -222,6 +222,11 @@ enum opt_type {
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Allow mounting in degraded mode") \
x(very_degraded, u8, \
OPT_MOUNT, \
OPT_BOOL(), \
NO_SB_OPT, false, \
NULL, "Allow mounting in when data will be missing") \
x(discard, u8, \
OPT_MOUNT|OPT_DEVICE, \
OPT_BOOL(), \

View File

@ -967,94 +967,48 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
/* Query replicas: */
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
struct bch_devs_mask online_devs)
bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
unsigned flags, bool print)
{
struct bch_sb_field_members *mi;
struct bch_replicas_entry *e;
unsigned i, nr_online, nr_offline;
struct replicas_status ret;
memset(&ret, 0, sizeof(ret));
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
ret.replicas[i].redundancy = INT_MAX;
mi = bch2_sb_get_members(c->disk_sb.sb);
bool ret = true;
percpu_down_read(&c->mark_lock);
for_each_cpu_replicas_entry(&c->replicas, e) {
if (e->data_type >= ARRAY_SIZE(ret.replicas))
panic("e %p data_type %u\n", e, e->data_type);
unsigned i, nr_online = 0, dflags = 0;
bool metadata = e->data_type < BCH_DATA_user;
nr_online = nr_offline = 0;
for (i = 0; i < e->nr_devs; i++)
nr_online += test_bit(e->devs[i], devs.d);
for (i = 0; i < e->nr_devs; i++) {
BUG_ON(!bch2_dev_exists(c->disk_sb.sb, mi,
e->devs[i]));
if (nr_online < e->nr_required)
dflags |= metadata
? BCH_FORCE_IF_METADATA_LOST
: BCH_FORCE_IF_DATA_LOST;
if (test_bit(e->devs[i], online_devs.d))
nr_online++;
else
nr_offline++;
if (nr_online < e->nr_devs)
dflags |= metadata
? BCH_FORCE_IF_METADATA_DEGRADED
: BCH_FORCE_IF_DATA_DEGRADED;
if (dflags & ~flags) {
if (print) {
char buf[100];
bch2_replicas_entry_to_text(&PBUF(buf), e);
bch_err(c, "insufficient devices online (%u) for replicas entry %s",
nr_online, buf);
}
ret = false;
break;
}
ret.replicas[e->data_type].redundancy =
min(ret.replicas[e->data_type].redundancy,
(int) nr_online - (int) e->nr_required);
ret.replicas[e->data_type].nr_offline =
max(ret.replicas[e->data_type].nr_offline,
nr_offline);
}
percpu_up_read(&c->mark_lock);
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
if (ret.replicas[i].redundancy == INT_MAX)
ret.replicas[i].redundancy = 0;
return ret;
}
struct replicas_status bch2_replicas_status(struct bch_fs *c)
{
return __bch2_replicas_status(c, bch2_online_devs(c));
}
static bool have_enough_devs(struct replicas_status s,
enum bch_data_type type,
bool force_if_degraded,
bool force_if_lost)
{
return (!s.replicas[type].nr_offline || force_if_degraded) &&
(s.replicas[type].redundancy >= 0 || force_if_lost);
}
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
{
return (have_enough_devs(s, BCH_DATA_journal,
flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
have_enough_devs(s, BCH_DATA_btree,
flags & BCH_FORCE_IF_METADATA_DEGRADED,
flags & BCH_FORCE_IF_METADATA_LOST) &&
have_enough_devs(s, BCH_DATA_user,
flags & BCH_FORCE_IF_DATA_DEGRADED,
flags & BCH_FORCE_IF_DATA_LOST));
}
int bch2_replicas_online(struct bch_fs *c, bool meta)
{
struct replicas_status s = bch2_replicas_status(c);
return (meta
? min(s.replicas[BCH_DATA_journal].redundancy,
s.replicas[BCH_DATA_btree].redundancy)
: s.replicas[BCH_DATA_user].redundancy) + 1;
}
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
{
struct bch_replicas_entry *e;

View File

@ -39,19 +39,9 @@ static inline void bch2_replicas_entry_cached(struct bch_replicas_entry *e,
e->devs[0] = dev;
}
struct replicas_status {
struct {
int redundancy;
unsigned nr_offline;
} replicas[BCH_DATA_NR];
};
bool bch2_have_enough_devs(struct bch_fs *, struct bch_devs_mask,
unsigned, bool);
struct replicas_status __bch2_replicas_status(struct bch_fs *,
struct bch_devs_mask);
struct replicas_status bch2_replicas_status(struct bch_fs *);
bool bch2_have_enough_devs(struct replicas_status, unsigned);
int bch2_replicas_online(struct bch_fs *, bool);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
int bch2_replicas_gc_end(struct bch_fs *, int);

View File

@ -770,15 +770,13 @@ int bch2_write_super(struct bch_fs *c)
nr_wrote = dev_mask_nr(&sb_written);
can_mount_with_written =
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
BCH_FORCE_IF_DEGRADED);
bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
for (i = 0; i < ARRAY_SIZE(sb_written.d); i++)
sb_written.d[i] = ~sb_written.d[i];
can_mount_without_written =
bch2_have_enough_devs(__bch2_replicas_status(c, sb_written),
BCH_FORCE_IF_DEGRADED);
bch2_have_enough_devs(c, sb_written, BCH_FORCE_IF_DEGRADED, false);
/*
* If we would be able to mount _without_ the devices we successfully
@ -789,6 +787,7 @@ int bch2_write_super(struct bch_fs *c)
* mount with the devices we did successfully write to:
*/
if (bch2_fs_fatal_err_on(!nr_wrote ||
!can_mount_with_written ||
(can_mount_without_written &&
!can_mount_with_written), c,
"Unable to write superblock to sufficient devices"))

View File

@ -1265,7 +1265,6 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
struct bch_devs_mask new_online_devs;
struct replicas_status s;
struct bch_dev *ca2;
int i, nr_rw = 0, required;
@ -1301,9 +1300,7 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
new_online_devs = bch2_online_devs(c);
__clear_bit(ca->dev_idx, new_online_devs.d);
s = __bch2_replicas_status(c, new_online_devs);
return bch2_have_enough_devs(s, flags);
return bch2_have_enough_devs(c, new_online_devs, flags, false);
default:
BUG();
}
@ -1311,14 +1308,18 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
static bool bch2_fs_may_start(struct bch_fs *c)
{
struct replicas_status s;
struct bch_sb_field_members *mi;
struct bch_dev *ca;
unsigned i, flags = c->opts.degraded
? BCH_FORCE_IF_DEGRADED
: 0;
unsigned i, flags = 0;
if (!c->opts.degraded) {
if (c->opts.very_degraded)
flags |= BCH_FORCE_IF_DEGRADED|BCH_FORCE_IF_LOST;
if (c->opts.degraded)
flags |= BCH_FORCE_IF_DEGRADED;
if (!c->opts.degraded &&
!c->opts.very_degraded) {
mutex_lock(&c->sb_lock);
mi = bch2_sb_get_members(c->disk_sb.sb);
@ -1338,9 +1339,7 @@ static bool bch2_fs_may_start(struct bch_fs *c)
mutex_unlock(&c->sb_lock);
}
s = bch2_replicas_status(c);
return bch2_have_enough_devs(s, flags);
return bch2_have_enough_devs(c, bch2_online_devs(c), flags, true);
}
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)

View File

@ -199,9 +199,6 @@ read_attribute(new_stripes);
rw_attribute(pd_controllers_update_seconds);
read_attribute(meta_replicas_have);
read_attribute(data_replicas_have);
read_attribute(io_timers_read);
read_attribute(io_timers_write);
@ -347,9 +344,6 @@ SHOW(bch2_fs)
sysfs_print(promote_whole_extents, c->promote_whole_extents);
sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true));
sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false));
/* Debugging: */
if (attr == &sysfs_alloc_debug)
@ -520,9 +514,6 @@ struct attribute *bch2_fs_files[] = {
&sysfs_btree_node_size,
&sysfs_btree_cache_size,
&sysfs_meta_replicas_have,
&sysfs_data_replicas_have,
&sysfs_journal_write_delay_ms,
&sysfs_journal_reclaim_delay_ms,