mirror of
https://github.com/torvalds/linux.git
synced 2024-11-26 06:02:05 +00:00
bcachefs: Kill journal pre-reservations
This deletes the complicated and somewhat expensive journal pre-reservation machinery in favor of just using journal watermarks: when the journal is more than half full, we run journal reclaim more aggressively, and when the journal is more than 3/4s full we only allow journal reclaim to get new journal reservations. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
701ff57eb3
commit
006ccc3090
@ -3087,8 +3087,6 @@ void bch2_trans_put(struct btree_trans *trans)
|
|||||||
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
|
|
||||||
|
|
||||||
kfree(trans->extra_journal_entries.data);
|
kfree(trans->extra_journal_entries.data);
|
||||||
|
|
||||||
if (trans->fs_usage_deltas) {
|
if (trans->fs_usage_deltas) {
|
||||||
|
@ -672,7 +672,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
bch2_journal_pin_drop(j, &ck->journal);
|
bch2_journal_pin_drop(j, &ck->journal);
|
||||||
bch2_journal_preres_put(j, &ck->res);
|
|
||||||
|
|
||||||
BUG_ON(!btree_node_locked(c_iter.path, 0));
|
BUG_ON(!btree_node_locked(c_iter.path, 0));
|
||||||
|
|
||||||
@ -770,18 +769,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
|
|||||||
|
|
||||||
BUG_ON(insert->k.u64s > ck->u64s);
|
BUG_ON(insert->k.u64s > ck->u64s);
|
||||||
|
|
||||||
if (likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY))) {
|
|
||||||
int difference;
|
|
||||||
|
|
||||||
BUG_ON(jset_u64s(insert->k.u64s) > trans->journal_preres.u64s);
|
|
||||||
|
|
||||||
difference = jset_u64s(insert->k.u64s) - ck->res.u64s;
|
|
||||||
if (difference > 0) {
|
|
||||||
trans->journal_preres.u64s -= difference;
|
|
||||||
ck->res.u64s += difference;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bkey_copy(ck->k, insert);
|
bkey_copy(ck->k, insert);
|
||||||
ck->valid = true;
|
ck->valid = true;
|
||||||
|
|
||||||
@ -1006,7 +993,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc)
|
|||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
bch2_journal_pin_drop(&c->journal, &ck->journal);
|
||||||
bch2_journal_preres_put(&c->journal, &ck->res);
|
|
||||||
|
|
||||||
list_del(&ck->list);
|
list_del(&ck->list);
|
||||||
kfree(ck->k);
|
kfree(ck->k);
|
||||||
|
@ -323,17 +323,6 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
|
|||||||
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
|
bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
|
||||||
}
|
}
|
||||||
|
|
||||||
static noinline int
|
|
||||||
bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned flags,
|
|
||||||
unsigned long trace_ip)
|
|
||||||
{
|
|
||||||
return drop_locks_do(trans,
|
|
||||||
bch2_journal_preres_get(&trans->c->journal,
|
|
||||||
&trans->journal_preres,
|
|
||||||
trans->journal_preres_u64s,
|
|
||||||
(flags & BCH_WATERMARK_MASK)));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
|
||||||
unsigned flags)
|
unsigned flags)
|
||||||
{
|
{
|
||||||
@ -882,14 +871,6 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = bch2_journal_preres_get(&c->journal,
|
|
||||||
&trans->journal_preres, trans->journal_preres_u64s,
|
|
||||||
(flags & BCH_WATERMARK_MASK)|JOURNAL_RES_GET_NONBLOCK);
|
|
||||||
if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
|
|
||||||
ret = bch2_trans_journal_preres_get_cold(trans, flags, trace_ip);
|
|
||||||
if (unlikely(ret))
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
ret = bch2_trans_lock_write(trans);
|
ret = bch2_trans_lock_write(trans);
|
||||||
if (unlikely(ret))
|
if (unlikely(ret))
|
||||||
return ret;
|
return ret;
|
||||||
@ -1052,7 +1033,6 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
|||||||
struct bch_fs *c = trans->c;
|
struct bch_fs *c = trans->c;
|
||||||
struct btree_insert_entry *i = NULL;
|
struct btree_insert_entry *i = NULL;
|
||||||
struct btree_write_buffered_key *wb;
|
struct btree_write_buffered_key *wb;
|
||||||
unsigned u64s;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
|
||||||
if (!trans->nr_updates &&
|
if (!trans->nr_updates &&
|
||||||
@ -1112,13 +1092,8 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
|||||||
|
|
||||||
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
|
||||||
|
|
||||||
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
|
|
||||||
|
|
||||||
trans->journal_u64s = trans->extra_journal_entries.nr;
|
trans->journal_u64s = trans->extra_journal_entries.nr;
|
||||||
trans->journal_preres_u64s = 0;
|
|
||||||
|
|
||||||
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
|
trans->journal_transaction_names = READ_ONCE(c->opts.journal_transaction_names);
|
||||||
|
|
||||||
if (trans->journal_transaction_names)
|
if (trans->journal_transaction_names)
|
||||||
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
|
trans->journal_u64s += jset_u64s(JSET_ENTRY_LOG_U64s);
|
||||||
|
|
||||||
@ -1134,16 +1109,11 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
|
|||||||
if (i->key_cache_already_flushed)
|
if (i->key_cache_already_flushed)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* we're going to journal the key being updated: */
|
|
||||||
u64s = jset_u64s(i->k->k.u64s);
|
|
||||||
if (i->cached &&
|
|
||||||
likely(!(flags & BTREE_INSERT_JOURNAL_REPLAY)))
|
|
||||||
trans->journal_preres_u64s += u64s;
|
|
||||||
|
|
||||||
if (i->flags & BTREE_UPDATE_NOJOURNAL)
|
if (i->flags & BTREE_UPDATE_NOJOURNAL)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
trans->journal_u64s += u64s;
|
/* we're going to journal the key being updated: */
|
||||||
|
trans->journal_u64s += jset_u64s(i->k->k.u64s);
|
||||||
|
|
||||||
/* and we're also going to log the overwrite: */
|
/* and we're also going to log the overwrite: */
|
||||||
if (trans->journal_transaction_names)
|
if (trans->journal_transaction_names)
|
||||||
@ -1175,8 +1145,6 @@ retry:
|
|||||||
|
|
||||||
trace_and_count(c, transaction_commit, trans, _RET_IP_);
|
trace_and_count(c, transaction_commit, trans, _RET_IP_);
|
||||||
out:
|
out:
|
||||||
bch2_journal_preres_put(&c->journal, &trans->journal_preres);
|
|
||||||
|
|
||||||
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
|
if (likely(!(flags & BTREE_INSERT_NOCHECK_RW)))
|
||||||
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
|
bch2_write_ref_put(c, BCH_WRITE_REF_trans);
|
||||||
out_reset:
|
out_reset:
|
||||||
|
@ -327,7 +327,6 @@ struct bkey_cached {
|
|||||||
struct rhash_head hash;
|
struct rhash_head hash;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
|
|
||||||
struct journal_preres res;
|
|
||||||
struct journal_entry_pin journal;
|
struct journal_entry_pin journal;
|
||||||
u64 seq;
|
u64 seq;
|
||||||
|
|
||||||
@ -441,11 +440,9 @@ struct btree_trans {
|
|||||||
struct journal_entry_pin *journal_pin;
|
struct journal_entry_pin *journal_pin;
|
||||||
|
|
||||||
struct journal_res journal_res;
|
struct journal_res journal_res;
|
||||||
struct journal_preres journal_preres;
|
|
||||||
u64 *journal_seq;
|
u64 *journal_seq;
|
||||||
struct disk_reservation *disk_res;
|
struct disk_reservation *disk_res;
|
||||||
unsigned journal_u64s;
|
unsigned journal_u64s;
|
||||||
unsigned journal_preres_u64s;
|
|
||||||
struct replicas_delta_list *fs_usage_deltas;
|
struct replicas_delta_list *fs_usage_deltas;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -513,8 +513,6 @@ static void bch2_btree_update_free(struct btree_update *as, struct btree_trans *
|
|||||||
up_read(&c->gc_lock);
|
up_read(&c->gc_lock);
|
||||||
as->took_gc_lock = false;
|
as->took_gc_lock = false;
|
||||||
|
|
||||||
bch2_journal_preres_put(&c->journal, &as->journal_preres);
|
|
||||||
|
|
||||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||||
bch2_journal_pin_flush(&c->journal, &as->journal);
|
bch2_journal_pin_flush(&c->journal, &as->journal);
|
||||||
bch2_disk_reservation_put(c, &as->disk_res);
|
bch2_disk_reservation_put(c, &as->disk_res);
|
||||||
@ -734,8 +732,6 @@ err:
|
|||||||
|
|
||||||
bch2_journal_pin_drop(&c->journal, &as->journal);
|
bch2_journal_pin_drop(&c->journal, &as->journal);
|
||||||
|
|
||||||
bch2_journal_preres_put(&c->journal, &as->journal_preres);
|
|
||||||
|
|
||||||
mutex_lock(&c->btree_interior_update_lock);
|
mutex_lock(&c->btree_interior_update_lock);
|
||||||
for (i = 0; i < as->nr_new_nodes; i++) {
|
for (i = 0; i < as->nr_new_nodes; i++) {
|
||||||
b = as->new_nodes[i];
|
b = as->new_nodes[i];
|
||||||
@ -1047,7 +1043,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
|||||||
unsigned nr_nodes[2] = { 0, 0 };
|
unsigned nr_nodes[2] = { 0, 0 };
|
||||||
unsigned update_level = level;
|
unsigned update_level = level;
|
||||||
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
||||||
unsigned journal_flags = 0;
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
u32 restart_count = trans->restart_count;
|
u32 restart_count = trans->restart_count;
|
||||||
|
|
||||||
@ -1061,10 +1056,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
|||||||
flags &= ~BCH_WATERMARK_MASK;
|
flags &= ~BCH_WATERMARK_MASK;
|
||||||
flags |= watermark;
|
flags |= watermark;
|
||||||
|
|
||||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
|
|
||||||
journal_flags |= JOURNAL_RES_GET_NONBLOCK;
|
|
||||||
journal_flags |= watermark;
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
nr_nodes[!!update_level] += 1 + split;
|
nr_nodes[!!update_level] += 1 + split;
|
||||||
update_level++;
|
update_level++;
|
||||||
@ -1129,27 +1120,6 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
|
|||||||
if (ret)
|
if (ret)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
|
||||||
BTREE_UPDATE_JOURNAL_RES,
|
|
||||||
journal_flags|JOURNAL_RES_GET_NONBLOCK);
|
|
||||||
if (ret) {
|
|
||||||
if (flags & BTREE_INSERT_JOURNAL_RECLAIM) {
|
|
||||||
ret = -BCH_ERR_journal_reclaim_would_deadlock;
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = drop_locks_do(trans,
|
|
||||||
bch2_journal_preres_get(&c->journal, &as->journal_preres,
|
|
||||||
BTREE_UPDATE_JOURNAL_RES,
|
|
||||||
journal_flags));
|
|
||||||
if (ret == -BCH_ERR_journal_preres_get_blocked) {
|
|
||||||
trace_and_count(c, trans_restart_journal_preres_get, trans, _RET_IP_, journal_flags);
|
|
||||||
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
|
|
||||||
}
|
|
||||||
if (ret)
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = bch2_disk_reservation_get(c, &as->disk_res,
|
ret = bch2_disk_reservation_get(c, &as->disk_res,
|
||||||
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
|
(nr_nodes[0] + nr_nodes[1]) * btree_sectors(c),
|
||||||
c->opts.metadata_replicas,
|
c->opts.metadata_replicas,
|
||||||
|
@ -55,7 +55,6 @@ struct btree_update {
|
|||||||
unsigned update_level;
|
unsigned update_level;
|
||||||
|
|
||||||
struct disk_reservation disk_res;
|
struct disk_reservation disk_res;
|
||||||
struct journal_preres journal_preres;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BTREE_INTERIOR_UPDATING_NODE:
|
* BTREE_INTERIOR_UPDATING_NODE:
|
||||||
|
@ -526,36 +526,6 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* journal_preres: */
|
|
||||||
|
|
||||||
static bool journal_preres_available(struct journal *j,
|
|
||||||
struct journal_preres *res,
|
|
||||||
unsigned new_u64s,
|
|
||||||
unsigned flags)
|
|
||||||
{
|
|
||||||
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags, true);
|
|
||||||
|
|
||||||
if (!ret && mutex_trylock(&j->reclaim_lock)) {
|
|
||||||
bch2_journal_reclaim(j);
|
|
||||||
mutex_unlock(&j->reclaim_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
int __bch2_journal_preres_get(struct journal *j,
|
|
||||||
struct journal_preres *res,
|
|
||||||
unsigned new_u64s,
|
|
||||||
unsigned flags)
|
|
||||||
{
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
closure_wait_event(&j->preres_wait,
|
|
||||||
(ret = bch2_journal_error(j)) ||
|
|
||||||
journal_preres_available(j, res, new_u64s, flags));
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* journal_entry_res: */
|
/* journal_entry_res: */
|
||||||
|
|
||||||
void bch2_journal_entry_res_resize(struct journal *j,
|
void bch2_journal_entry_res_resize(struct journal *j,
|
||||||
@ -1306,7 +1276,6 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
|
|||||||
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
|
prt_printf(out, "last_seq:\t\t%llu\n", journal_last_seq(j));
|
||||||
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
|
prt_printf(out, "last_seq_ondisk:\t%llu\n", j->last_seq_ondisk);
|
||||||
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
|
prt_printf(out, "flushed_seq_ondisk:\t%llu\n", j->flushed_seq_ondisk);
|
||||||
prt_printf(out, "prereserved:\t\t%u/%u\n", j->prereserved.reserved, j->prereserved.remaining);
|
|
||||||
prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
|
prt_printf(out, "watermark:\t\t%s\n", bch2_watermarks[j->watermark]);
|
||||||
prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
|
prt_printf(out, "each entry reserved:\t%u\n", j->entry_u64s_reserved);
|
||||||
prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
|
prt_printf(out, "nr flush writes:\t%llu\n", j->nr_flush_writes);
|
||||||
|
@ -395,104 +395,6 @@ out:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* journal_preres: */
|
|
||||||
|
|
||||||
static inline void journal_set_watermark(struct journal *j)
|
|
||||||
{
|
|
||||||
union journal_preres_state s = READ_ONCE(j->prereserved);
|
|
||||||
unsigned watermark = BCH_WATERMARK_stripe;
|
|
||||||
|
|
||||||
if (fifo_free(&j->pin) < j->pin.size / 4)
|
|
||||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
|
|
||||||
if (fifo_free(&j->pin) < j->pin.size / 8)
|
|
||||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
|
|
||||||
|
|
||||||
if (s.reserved > s.remaining)
|
|
||||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_copygc);
|
|
||||||
if (!s.remaining)
|
|
||||||
watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
|
|
||||||
|
|
||||||
if (watermark == j->watermark)
|
|
||||||
return;
|
|
||||||
|
|
||||||
swap(watermark, j->watermark);
|
|
||||||
if (watermark > j->watermark)
|
|
||||||
journal_wake(j);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void bch2_journal_preres_put(struct journal *j,
|
|
||||||
struct journal_preres *res)
|
|
||||||
{
|
|
||||||
union journal_preres_state s = { .reserved = res->u64s };
|
|
||||||
|
|
||||||
if (!res->u64s)
|
|
||||||
return;
|
|
||||||
|
|
||||||
s.v = atomic64_sub_return(s.v, &j->prereserved.counter);
|
|
||||||
res->u64s = 0;
|
|
||||||
|
|
||||||
if (unlikely(s.waiting)) {
|
|
||||||
clear_bit(ilog2((((union journal_preres_state) { .waiting = 1 }).v)),
|
|
||||||
(unsigned long *) &j->prereserved.v);
|
|
||||||
closure_wake_up(&j->preres_wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (s.reserved <= s.remaining && j->watermark)
|
|
||||||
journal_set_watermark(j);
|
|
||||||
}
|
|
||||||
|
|
||||||
int __bch2_journal_preres_get(struct journal *,
|
|
||||||
struct journal_preres *, unsigned, unsigned);
|
|
||||||
|
|
||||||
static inline int bch2_journal_preres_get_fast(struct journal *j,
|
|
||||||
struct journal_preres *res,
|
|
||||||
unsigned new_u64s,
|
|
||||||
unsigned flags,
|
|
||||||
bool set_waiting)
|
|
||||||
{
|
|
||||||
int d = new_u64s - res->u64s;
|
|
||||||
union journal_preres_state old, new;
|
|
||||||
u64 v = atomic64_read(&j->prereserved.counter);
|
|
||||||
enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
do {
|
|
||||||
old.v = new.v = v;
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
if (watermark == BCH_WATERMARK_reclaim ||
|
|
||||||
new.reserved + d < new.remaining) {
|
|
||||||
new.reserved += d;
|
|
||||||
ret = 1;
|
|
||||||
} else if (set_waiting && !new.waiting)
|
|
||||||
new.waiting = true;
|
|
||||||
else
|
|
||||||
return 0;
|
|
||||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
|
||||||
old.v, new.v)) != old.v);
|
|
||||||
|
|
||||||
if (ret)
|
|
||||||
res->u64s += d;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int bch2_journal_preres_get(struct journal *j,
|
|
||||||
struct journal_preres *res,
|
|
||||||
unsigned new_u64s,
|
|
||||||
unsigned flags)
|
|
||||||
{
|
|
||||||
if (new_u64s <= res->u64s)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (bch2_journal_preres_get_fast(j, res, new_u64s, flags, false))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (flags & JOURNAL_RES_GET_NONBLOCK)
|
|
||||||
return -BCH_ERR_journal_preres_get_blocked;
|
|
||||||
|
|
||||||
return __bch2_journal_preres_get(j, res, new_u64s, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* journal_entry_res: */
|
/* journal_entry_res: */
|
||||||
|
|
||||||
void bch2_journal_entry_res_resize(struct journal *,
|
void bch2_journal_entry_res_resize(struct journal *,
|
||||||
|
@ -50,16 +50,21 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j,
|
|||||||
return available;
|
return available;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void journal_set_remaining(struct journal *j, unsigned u64s_remaining)
|
static inline void journal_set_watermark(struct journal *j, bool low_on_space)
|
||||||
{
|
{
|
||||||
union journal_preres_state old, new;
|
unsigned watermark = BCH_WATERMARK_stripe;
|
||||||
u64 v = atomic64_read(&j->prereserved.counter);
|
|
||||||
|
|
||||||
do {
|
if (low_on_space)
|
||||||
old.v = new.v = v;
|
watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
|
||||||
new.remaining = u64s_remaining;
|
if (fifo_free(&j->pin) < j->pin.size / 4)
|
||||||
} while ((v = atomic64_cmpxchg(&j->prereserved.counter,
|
watermark = max_t(unsigned, watermark, BCH_WATERMARK_reclaim);
|
||||||
old.v, new.v)) != old.v);
|
|
||||||
|
if (watermark == j->watermark)
|
||||||
|
return;
|
||||||
|
|
||||||
|
swap(watermark, j->watermark);
|
||||||
|
if (watermark > j->watermark)
|
||||||
|
journal_wake(j);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct journal_space
|
static struct journal_space
|
||||||
@ -162,7 +167,6 @@ void bch2_journal_space_available(struct journal *j)
|
|||||||
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
struct bch_fs *c = container_of(j, struct bch_fs, journal);
|
||||||
struct bch_dev *ca;
|
struct bch_dev *ca;
|
||||||
unsigned clean, clean_ondisk, total;
|
unsigned clean, clean_ondisk, total;
|
||||||
s64 u64s_remaining = 0;
|
|
||||||
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
|
unsigned max_entry_size = min(j->buf[0].buf_size >> 9,
|
||||||
j->buf[1].buf_size >> 9);
|
j->buf[1].buf_size >> 9);
|
||||||
unsigned i, nr_online = 0, nr_devs_want;
|
unsigned i, nr_online = 0, nr_devs_want;
|
||||||
@ -222,16 +226,10 @@ void bch2_journal_space_available(struct journal *j)
|
|||||||
else
|
else
|
||||||
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
|
clear_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags);
|
||||||
|
|
||||||
u64s_remaining = (u64) clean << 6;
|
journal_set_watermark(j, clean * 4 <= total);
|
||||||
u64s_remaining -= (u64) total << 3;
|
|
||||||
u64s_remaining = max(0LL, u64s_remaining);
|
|
||||||
u64s_remaining /= 4;
|
|
||||||
u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
|
|
||||||
out:
|
out:
|
||||||
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
j->cur_entry_sectors = !ret ? j->space[journal_space_discarded].next_entry : 0;
|
||||||
j->cur_entry_error = ret;
|
j->cur_entry_error = ret;
|
||||||
journal_set_remaining(j, u64s_remaining);
|
|
||||||
journal_set_watermark(j);
|
|
||||||
|
|
||||||
if (!ret)
|
if (!ret)
|
||||||
journal_wake(j);
|
journal_wake(j);
|
||||||
@ -555,11 +553,6 @@ static u64 journal_seq_to_flush(struct journal *j)
|
|||||||
/* Try to keep the journal at most half full: */
|
/* Try to keep the journal at most half full: */
|
||||||
nr_buckets = ja->nr / 2;
|
nr_buckets = ja->nr / 2;
|
||||||
|
|
||||||
/* And include pre-reservations: */
|
|
||||||
nr_buckets += DIV_ROUND_UP(j->prereserved.reserved,
|
|
||||||
(ca->mi.bucket_size << 6) -
|
|
||||||
journal_entry_overhead(j));
|
|
||||||
|
|
||||||
nr_buckets = min(nr_buckets, ja->nr);
|
nr_buckets = min(nr_buckets, ja->nr);
|
||||||
|
|
||||||
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
|
bucket_to_flush = (ja->cur_idx + nr_buckets) % ja->nr;
|
||||||
@ -638,10 +631,7 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
msecs_to_jiffies(c->opts.journal_reclaim_delay)))
|
msecs_to_jiffies(c->opts.journal_reclaim_delay)))
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
if (j->prereserved.reserved * 4 > j->prereserved.remaining)
|
if (j->watermark != BCH_WATERMARK_stripe)
|
||||||
min_nr = 1;
|
|
||||||
|
|
||||||
if (fifo_free(&j->pin) <= 32)
|
|
||||||
min_nr = 1;
|
min_nr = 1;
|
||||||
|
|
||||||
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
|
if (atomic_read(&c->btree_cache.dirty) * 2 > c->btree_cache.used)
|
||||||
@ -652,8 +642,6 @@ static int __bch2_journal_reclaim(struct journal *j, bool direct, bool kicked)
|
|||||||
trace_and_count(c, journal_reclaim_start, c,
|
trace_and_count(c, journal_reclaim_start, c,
|
||||||
direct, kicked,
|
direct, kicked,
|
||||||
min_nr, min_key_cache,
|
min_nr, min_key_cache,
|
||||||
j->prereserved.reserved,
|
|
||||||
j->prereserved.remaining,
|
|
||||||
atomic_read(&c->btree_cache.dirty),
|
atomic_read(&c->btree_cache.dirty),
|
||||||
c->btree_cache.used,
|
c->btree_cache.used,
|
||||||
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
atomic_long_read(&c->btree_key_cache.nr_dirty),
|
||||||
|
@ -76,14 +76,6 @@ struct journal_res {
|
|||||||
u64 seq;
|
u64 seq;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* For reserving space in the journal prior to getting a reservation on a
|
|
||||||
* particular journal entry:
|
|
||||||
*/
|
|
||||||
struct journal_preres {
|
|
||||||
unsigned u64s;
|
|
||||||
};
|
|
||||||
|
|
||||||
union journal_res_state {
|
union journal_res_state {
|
||||||
struct {
|
struct {
|
||||||
atomic64_t counter;
|
atomic64_t counter;
|
||||||
@ -104,22 +96,6 @@ union journal_res_state {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
union journal_preres_state {
|
|
||||||
struct {
|
|
||||||
atomic64_t counter;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct {
|
|
||||||
u64 v;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct {
|
|
||||||
u64 waiting:1,
|
|
||||||
reserved:31,
|
|
||||||
remaining:32;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
/* bytes: */
|
/* bytes: */
|
||||||
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
#define JOURNAL_ENTRY_SIZE_MIN (64U << 10) /* 64k */
|
||||||
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
#define JOURNAL_ENTRY_SIZE_MAX (4U << 20) /* 4M */
|
||||||
@ -180,8 +156,6 @@ struct journal {
|
|||||||
union journal_res_state reservations;
|
union journal_res_state reservations;
|
||||||
enum bch_watermark watermark;
|
enum bch_watermark watermark;
|
||||||
|
|
||||||
union journal_preres_state prereserved;
|
|
||||||
|
|
||||||
} __aligned(SMP_CACHE_BYTES);
|
} __aligned(SMP_CACHE_BYTES);
|
||||||
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
@ -196,10 +196,9 @@ DEFINE_EVENT(bio, journal_write,
|
|||||||
TRACE_EVENT(journal_reclaim_start,
|
TRACE_EVENT(journal_reclaim_start,
|
||||||
TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
|
TP_PROTO(struct bch_fs *c, bool direct, bool kicked,
|
||||||
u64 min_nr, u64 min_key_cache,
|
u64 min_nr, u64 min_key_cache,
|
||||||
u64 prereserved, u64 prereserved_total,
|
|
||||||
u64 btree_cache_dirty, u64 btree_cache_total,
|
u64 btree_cache_dirty, u64 btree_cache_total,
|
||||||
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
|
u64 btree_key_cache_dirty, u64 btree_key_cache_total),
|
||||||
TP_ARGS(c, direct, kicked, min_nr, min_key_cache, prereserved, prereserved_total,
|
TP_ARGS(c, direct, kicked, min_nr, min_key_cache,
|
||||||
btree_cache_dirty, btree_cache_total,
|
btree_cache_dirty, btree_cache_total,
|
||||||
btree_key_cache_dirty, btree_key_cache_total),
|
btree_key_cache_dirty, btree_key_cache_total),
|
||||||
|
|
||||||
@ -209,8 +208,6 @@ TRACE_EVENT(journal_reclaim_start,
|
|||||||
__field(bool, kicked )
|
__field(bool, kicked )
|
||||||
__field(u64, min_nr )
|
__field(u64, min_nr )
|
||||||
__field(u64, min_key_cache )
|
__field(u64, min_key_cache )
|
||||||
__field(u64, prereserved )
|
|
||||||
__field(u64, prereserved_total )
|
|
||||||
__field(u64, btree_cache_dirty )
|
__field(u64, btree_cache_dirty )
|
||||||
__field(u64, btree_cache_total )
|
__field(u64, btree_cache_total )
|
||||||
__field(u64, btree_key_cache_dirty )
|
__field(u64, btree_key_cache_dirty )
|
||||||
@ -223,22 +220,18 @@ TRACE_EVENT(journal_reclaim_start,
|
|||||||
__entry->kicked = kicked;
|
__entry->kicked = kicked;
|
||||||
__entry->min_nr = min_nr;
|
__entry->min_nr = min_nr;
|
||||||
__entry->min_key_cache = min_key_cache;
|
__entry->min_key_cache = min_key_cache;
|
||||||
__entry->prereserved = prereserved;
|
|
||||||
__entry->prereserved_total = prereserved_total;
|
|
||||||
__entry->btree_cache_dirty = btree_cache_dirty;
|
__entry->btree_cache_dirty = btree_cache_dirty;
|
||||||
__entry->btree_cache_total = btree_cache_total;
|
__entry->btree_cache_total = btree_cache_total;
|
||||||
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
|
__entry->btree_key_cache_dirty = btree_key_cache_dirty;
|
||||||
__entry->btree_key_cache_total = btree_key_cache_total;
|
__entry->btree_key_cache_total = btree_key_cache_total;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu prereserved %llu/%llu btree cache %llu/%llu key cache %llu/%llu",
|
TP_printk("%d,%d direct %u kicked %u min %llu key cache %llu btree cache %llu/%llu key cache %llu/%llu",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
__entry->direct,
|
__entry->direct,
|
||||||
__entry->kicked,
|
__entry->kicked,
|
||||||
__entry->min_nr,
|
__entry->min_nr,
|
||||||
__entry->min_key_cache,
|
__entry->min_key_cache,
|
||||||
__entry->prereserved,
|
|
||||||
__entry->prereserved_total,
|
|
||||||
__entry->btree_cache_dirty,
|
__entry->btree_cache_dirty,
|
||||||
__entry->btree_cache_total,
|
__entry->btree_cache_total,
|
||||||
__entry->btree_key_cache_dirty,
|
__entry->btree_key_cache_dirty,
|
||||||
|
Loading…
Reference in New Issue
Block a user