bcache: Write out full stripes
Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet <koverstreet@google.com>
This commit is contained in:
parent
279afbad4e
commit
72c270612b
@ -387,8 +387,6 @@ struct keybuf_key {
|
|||||||
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
|
typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
|
||||||
|
|
||||||
struct keybuf {
|
struct keybuf {
|
||||||
keybuf_pred_fn *key_predicate;
|
|
||||||
|
|
||||||
struct bkey last_scanned;
|
struct bkey last_scanned;
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
|
|
||||||
@ -532,6 +530,7 @@ struct cached_dev {
|
|||||||
unsigned sequential_merge:1;
|
unsigned sequential_merge:1;
|
||||||
unsigned verify:1;
|
unsigned verify:1;
|
||||||
|
|
||||||
|
unsigned partial_stripes_expensive:1;
|
||||||
unsigned writeback_metadata:1;
|
unsigned writeback_metadata:1;
|
||||||
unsigned writeback_running:1;
|
unsigned writeback_running:1;
|
||||||
unsigned char writeback_percent;
|
unsigned char writeback_percent;
|
||||||
|
@ -2252,7 +2252,8 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
|
static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
|
||||||
struct keybuf *buf, struct bkey *end)
|
struct keybuf *buf, struct bkey *end,
|
||||||
|
keybuf_pred_fn *pred)
|
||||||
{
|
{
|
||||||
struct btree_iter iter;
|
struct btree_iter iter;
|
||||||
bch_btree_iter_init(b, &iter, &buf->last_scanned);
|
bch_btree_iter_init(b, &iter, &buf->last_scanned);
|
||||||
@ -2271,7 +2272,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
|
|||||||
if (bkey_cmp(&buf->last_scanned, end) >= 0)
|
if (bkey_cmp(&buf->last_scanned, end) >= 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (buf->key_predicate(buf, k)) {
|
if (pred(buf, k)) {
|
||||||
struct keybuf_key *w;
|
struct keybuf_key *w;
|
||||||
|
|
||||||
spin_lock(&buf->lock);
|
spin_lock(&buf->lock);
|
||||||
@ -2290,7 +2291,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
|
|||||||
if (!k)
|
if (!k)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
btree(refill_keybuf, k, b, op, buf, end);
|
btree(refill_keybuf, k, b, op, buf, end, pred);
|
||||||
/*
|
/*
|
||||||
* Might get an error here, but can't really do anything
|
* Might get an error here, but can't really do anything
|
||||||
* and it'll get logged elsewhere. Just read what we
|
* and it'll get logged elsewhere. Just read what we
|
||||||
@ -2308,7 +2309,7 @@ static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
|
void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
|
||||||
struct bkey *end)
|
struct bkey *end, keybuf_pred_fn *pred)
|
||||||
{
|
{
|
||||||
struct bkey start = buf->last_scanned;
|
struct bkey start = buf->last_scanned;
|
||||||
struct btree_op op;
|
struct btree_op op;
|
||||||
@ -2316,7 +2317,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
|
|||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
btree_root(refill_keybuf, c, &op, buf, end);
|
btree_root(refill_keybuf, c, &op, buf, end, pred);
|
||||||
closure_sync(&op.cl);
|
closure_sync(&op.cl);
|
||||||
|
|
||||||
pr_debug("found %s keys from %llu:%llu to %llu:%llu",
|
pr_debug("found %s keys from %llu:%llu to %llu:%llu",
|
||||||
@ -2402,7 +2403,8 @@ struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
|
|||||||
|
|
||||||
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
|
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
|
||||||
struct keybuf *buf,
|
struct keybuf *buf,
|
||||||
struct bkey *end)
|
struct bkey *end,
|
||||||
|
keybuf_pred_fn *pred)
|
||||||
{
|
{
|
||||||
struct keybuf_key *ret;
|
struct keybuf_key *ret;
|
||||||
|
|
||||||
@ -2416,15 +2418,14 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
bch_refill_keybuf(c, buf, end);
|
bch_refill_keybuf(c, buf, end, pred);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn)
|
void bch_keybuf_init(struct keybuf *buf)
|
||||||
{
|
{
|
||||||
buf->key_predicate = fn;
|
|
||||||
buf->last_scanned = MAX_KEY;
|
buf->last_scanned = MAX_KEY;
|
||||||
buf->keys = RB_ROOT;
|
buf->keys = RB_ROOT;
|
||||||
|
|
||||||
|
@ -391,13 +391,14 @@ void bch_moving_gc(struct closure *);
|
|||||||
int bch_btree_check(struct cache_set *, struct btree_op *);
|
int bch_btree_check(struct cache_set *, struct btree_op *);
|
||||||
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
|
uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
|
||||||
|
|
||||||
void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *);
|
void bch_keybuf_init(struct keybuf *);
|
||||||
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *);
|
void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
|
||||||
|
keybuf_pred_fn *);
|
||||||
bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
|
bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
|
||||||
struct bkey *);
|
struct bkey *);
|
||||||
void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
|
void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
|
||||||
struct keybuf_key *bch_keybuf_next(struct keybuf *);
|
struct keybuf_key *bch_keybuf_next(struct keybuf *);
|
||||||
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *,
|
struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
|
||||||
struct keybuf *, struct bkey *);
|
struct bkey *, keybuf_pred_fn *);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -357,7 +357,7 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
|
|||||||
if (i->bytes)
|
if (i->bytes)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
|
w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
|
||||||
if (!w)
|
if (!w)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -380,7 +380,7 @@ static int bch_dump_open(struct inode *inode, struct file *file)
|
|||||||
|
|
||||||
file->private_data = i;
|
file->private_data = i;
|
||||||
i->c = c;
|
i->c = c;
|
||||||
bch_keybuf_init(&i->keys, dump_pred);
|
bch_keybuf_init(&i->keys);
|
||||||
i->keys.last_scanned = KEY(0, 0, 0);
|
i->keys.last_scanned = KEY(0, 0, 0);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -136,7 +136,8 @@ static void read_moving(struct closure *cl)
|
|||||||
/* XXX: if we error, background writeback could stall indefinitely */
|
/* XXX: if we error, background writeback could stall indefinitely */
|
||||||
|
|
||||||
while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
|
while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
|
||||||
w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY);
|
w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
|
||||||
|
&MAX_KEY, moving_pred);
|
||||||
if (!w)
|
if (!w)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -248,5 +249,5 @@ void bch_moving_gc(struct closure *cl)
|
|||||||
|
|
||||||
void bch_moving_init_cache_set(struct cache_set *c)
|
void bch_moving_init_cache_set(struct cache_set *c)
|
||||||
{
|
{
|
||||||
bch_keybuf_init(&c->moving_gc_keys, moving_pred);
|
bch_keybuf_init(&c->moving_gc_keys);
|
||||||
}
|
}
|
||||||
|
@ -22,8 +22,6 @@
|
|||||||
|
|
||||||
#define CUTOFF_CACHE_ADD 95
|
#define CUTOFF_CACHE_ADD 95
|
||||||
#define CUTOFF_CACHE_READA 90
|
#define CUTOFF_CACHE_READA 90
|
||||||
#define CUTOFF_WRITEBACK 50
|
|
||||||
#define CUTOFF_WRITEBACK_SYNC 75
|
|
||||||
|
|
||||||
struct kmem_cache *bch_search_cache;
|
struct kmem_cache *bch_search_cache;
|
||||||
|
|
||||||
@ -998,17 +996,6 @@ static void cached_dev_write_complete(struct closure *cl)
|
|||||||
cached_dev_bio_complete(cl);
|
cached_dev_bio_complete(cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool should_writeback(struct cached_dev *dc, struct bio *bio)
|
|
||||||
{
|
|
||||||
unsigned threshold = (bio->bi_rw & REQ_SYNC)
|
|
||||||
? CUTOFF_WRITEBACK_SYNC
|
|
||||||
: CUTOFF_WRITEBACK;
|
|
||||||
|
|
||||||
return !atomic_read(&dc->disk.detaching) &&
|
|
||||||
cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
|
|
||||||
dc->disk.c->gc_stats.in_use < threshold;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void request_write(struct cached_dev *dc, struct search *s)
|
static void request_write(struct cached_dev *dc, struct search *s)
|
||||||
{
|
{
|
||||||
struct closure *cl = &s->cl;
|
struct closure *cl = &s->cl;
|
||||||
@ -1030,12 +1017,16 @@ static void request_write(struct cached_dev *dc, struct search *s)
|
|||||||
if (bio->bi_rw & REQ_DISCARD)
|
if (bio->bi_rw & REQ_DISCARD)
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
|
if (should_writeback(dc, s->orig_bio,
|
||||||
|
cache_mode(dc, bio),
|
||||||
|
s->op.skip)) {
|
||||||
|
s->op.skip = false;
|
||||||
|
s->writeback = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (s->op.skip)
|
if (s->op.skip)
|
||||||
goto skip;
|
goto skip;
|
||||||
|
|
||||||
if (should_writeback(dc, s->orig_bio))
|
|
||||||
s->writeback = true;
|
|
||||||
|
|
||||||
trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
|
trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
|
||||||
|
|
||||||
if (!s->writeback) {
|
if (!s->writeback) {
|
||||||
|
@ -81,6 +81,9 @@ rw_attribute(writeback_rate_p_term_inverse);
|
|||||||
rw_attribute(writeback_rate_d_smooth);
|
rw_attribute(writeback_rate_d_smooth);
|
||||||
read_attribute(writeback_rate_debug);
|
read_attribute(writeback_rate_debug);
|
||||||
|
|
||||||
|
read_attribute(stripe_size);
|
||||||
|
read_attribute(partial_stripes_expensive);
|
||||||
|
|
||||||
rw_attribute(synchronous);
|
rw_attribute(synchronous);
|
||||||
rw_attribute(journal_delay_ms);
|
rw_attribute(journal_delay_ms);
|
||||||
rw_attribute(discard);
|
rw_attribute(discard);
|
||||||
@ -147,6 +150,9 @@ SHOW(__bch_cached_dev)
|
|||||||
sysfs_hprint(dirty_data,
|
sysfs_hprint(dirty_data,
|
||||||
bcache_dev_sectors_dirty(&dc->disk) << 9);
|
bcache_dev_sectors_dirty(&dc->disk) << 9);
|
||||||
|
|
||||||
|
sysfs_hprint(stripe_size, (1 << dc->disk.stripe_size_bits) << 9);
|
||||||
|
var_printf(partial_stripes_expensive, "%u");
|
||||||
|
|
||||||
var_printf(sequential_merge, "%i");
|
var_printf(sequential_merge, "%i");
|
||||||
var_hprint(sequential_cutoff);
|
var_hprint(sequential_cutoff);
|
||||||
var_hprint(readahead);
|
var_hprint(readahead);
|
||||||
@ -286,6 +292,8 @@ static struct attribute *bch_cached_dev_files[] = {
|
|||||||
&sysfs_writeback_rate_d_smooth,
|
&sysfs_writeback_rate_d_smooth,
|
||||||
&sysfs_writeback_rate_debug,
|
&sysfs_writeback_rate_debug,
|
||||||
&sysfs_dirty_data,
|
&sysfs_dirty_data,
|
||||||
|
&sysfs_stripe_size,
|
||||||
|
&sysfs_partial_stripes_expensive,
|
||||||
&sysfs_sequential_cutoff,
|
&sysfs_sequential_cutoff,
|
||||||
&sysfs_sequential_merge,
|
&sysfs_sequential_merge,
|
||||||
&sysfs_clear_stats,
|
&sysfs_clear_stats,
|
||||||
|
@ -108,6 +108,31 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
|
|||||||
return KEY_DIRTY(k);
|
return KEY_DIRTY(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
|
||||||
|
{
|
||||||
|
uint64_t stripe;
|
||||||
|
unsigned nr_sectors = KEY_SIZE(k);
|
||||||
|
struct cached_dev *dc = container_of(buf, struct cached_dev,
|
||||||
|
writeback_keys);
|
||||||
|
unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
|
||||||
|
|
||||||
|
if (!KEY_DIRTY(k))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
|
||||||
|
while (1) {
|
||||||
|
if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
|
||||||
|
stripe_size)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (nr_sectors <= stripe_size)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
nr_sectors -= stripe_size;
|
||||||
|
stripe++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void dirty_init(struct keybuf_key *w)
|
static void dirty_init(struct keybuf_key *w)
|
||||||
{
|
{
|
||||||
struct dirty_io *io = w->private;
|
struct dirty_io *io = w->private;
|
||||||
@ -152,7 +177,22 @@ static void refill_dirty(struct closure *cl)
|
|||||||
searched_from_start = true;
|
searched_from_start = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bch_refill_keybuf(dc->disk.c, buf, &end);
|
if (dc->partial_stripes_expensive) {
|
||||||
|
uint64_t i;
|
||||||
|
|
||||||
|
for (i = 0; i < dc->disk.nr_stripes; i++)
|
||||||
|
if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
|
||||||
|
1 << dc->disk.stripe_size_bits)
|
||||||
|
goto full_stripes;
|
||||||
|
|
||||||
|
goto normal_refill;
|
||||||
|
full_stripes:
|
||||||
|
bch_refill_keybuf(dc->disk.c, buf, &end,
|
||||||
|
dirty_full_stripe_pred);
|
||||||
|
} else {
|
||||||
|
normal_refill:
|
||||||
|
bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
|
||||||
|
}
|
||||||
|
|
||||||
if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
|
if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
|
||||||
/* Searched the entire btree - delay awhile */
|
/* Searched the entire btree - delay awhile */
|
||||||
@ -446,7 +486,7 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
|||||||
closure_init_unlocked(&dc->writeback);
|
closure_init_unlocked(&dc->writeback);
|
||||||
init_rwsem(&dc->writeback_lock);
|
init_rwsem(&dc->writeback_lock);
|
||||||
|
|
||||||
bch_keybuf_init(&dc->writeback_keys, dirty_pred);
|
bch_keybuf_init(&dc->writeback_keys);
|
||||||
|
|
||||||
dc->writeback_metadata = true;
|
dc->writeback_metadata = true;
|
||||||
dc->writeback_running = true;
|
dc->writeback_running = true;
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
#ifndef _BCACHE_WRITEBACK_H
|
#ifndef _BCACHE_WRITEBACK_H
|
||||||
#define _BCACHE_WRITEBACK_H
|
#define _BCACHE_WRITEBACK_H
|
||||||
|
|
||||||
|
#define CUTOFF_WRITEBACK 40
|
||||||
|
#define CUTOFF_WRITEBACK_SYNC 70
|
||||||
|
|
||||||
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
|
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
|
||||||
{
|
{
|
||||||
uint64_t i, ret = 0;
|
uint64_t i, ret = 0;
|
||||||
@ -11,6 +14,46 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
|
||||||
|
uint64_t offset,
|
||||||
|
unsigned nr_sectors)
|
||||||
|
{
|
||||||
|
uint64_t stripe = offset >> d->stripe_size_bits;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
if (atomic_read(d->stripe_sectors_dirty + stripe))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (nr_sectors <= 1 << d->stripe_size_bits)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nr_sectors -= 1 << d->stripe_size_bits;
|
||||||
|
stripe++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
|
||||||
|
unsigned cache_mode, bool would_skip)
|
||||||
|
{
|
||||||
|
unsigned in_use = dc->disk.c->gc_stats.in_use;
|
||||||
|
|
||||||
|
if (cache_mode != CACHE_MODE_WRITEBACK ||
|
||||||
|
atomic_read(&dc->disk.detaching) ||
|
||||||
|
in_use > CUTOFF_WRITEBACK_SYNC)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (dc->partial_stripes_expensive &&
|
||||||
|
bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
|
||||||
|
bio_sectors(bio)))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (would_skip)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return bio->bi_rw & REQ_SYNC ||
|
||||||
|
in_use <= CUTOFF_WRITEBACK;
|
||||||
|
}
|
||||||
|
|
||||||
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
|
void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
|
||||||
void bch_writeback_queue(struct cached_dev *);
|
void bch_writeback_queue(struct cached_dev *);
|
||||||
void bch_writeback_add(struct cached_dev *);
|
void bch_writeback_add(struct cached_dev *);
|
||||||
|
Loading…
Reference in New Issue
Block a user