linux/drivers/md/bcache/movinggc.c
Kent Overstreet 78365411b3 bcache: Rework allocator reserves
We need a reserve for allocating buckets for new btree nodes - and now that
we've got multiple btrees, it really needs to be per btree.

This reworks the reserves so we've got separate freelists for each reserve
instead of watermarks, which seems to make things a bit cleaner, and it adds
some code so that btree_split() can make sure the reserve is available before it
starts.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
2014-01-08 13:05:09 -08:00

255 lines
5.3 KiB
C

/*
* Moving/copying garbage collector
*
* Copyright 2012 Google, Inc.
*/
#include "bcache.h"
#include "btree.h"
#include "debug.h"
#include "request.h"
#include <trace/events/bcache.h>
struct moving_io {
struct closure cl;
struct keybuf_key *w;
struct data_insert_op op;
struct bbio bio;
};
static bool moving_pred(struct keybuf *buf, struct bkey *k)
{
struct cache_set *c = container_of(buf, struct cache_set,
moving_gc_keys);
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
struct bucket *g = PTR_BUCKET(c, k, i);
if (GC_MOVE(g))
return true;
}
return false;
}
/* Moving GC - IO loop */
static void moving_io_destructor(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
kfree(io);
}
static void write_moving_finish(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct bio *bio = &io->bio.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i)
__free_page(bv->bv_page);
if (io->op.replace_collision)
trace_bcache_gc_copy_collision(&io->w->key);
bch_keybuf_del(&io->op.c->moving_gc_keys, io->w);
up(&io->op.c->moving_in_flight);
closure_return_with_destructor(cl, moving_io_destructor);
}
static void read_moving_endio(struct bio *bio, int error)
{
struct bbio *b = container_of(bio, struct bbio, bio);
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, cl);
if (error)
io->op.error = error;
else if (!KEY_DIRTY(&b->key) &&
ptr_stale(io->op.c, &b->key, 0)) {
io->op.error = -EINTR;
}
bch_bbio_endio(io->op.c, bio, error, "reading data to move");
}
static void moving_init(struct moving_io *io)
{
struct bio *bio = &io->bio.bio;
bio_init(bio);
bio_get(bio);
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
PAGE_SECTORS);
bio->bi_private = &io->cl;
bio->bi_io_vec = bio->bi_inline_vecs;
bch_bio_map(bio, NULL);
}
static void write_moving(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct data_insert_op *op = &io->op;
if (!op->error) {
moving_init(io);
io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
op->write_prio = 1;
op->bio = &io->bio.bio;
op->writeback = KEY_DIRTY(&io->w->key);
op->csum = KEY_CSUM(&io->w->key);
bkey_copy(&op->replace_key, &io->w->key);
op->replace = true;
closure_call(&op->cl, bch_data_insert, NULL, cl);
}
continue_at(cl, write_moving_finish, system_wq);
}
static void read_moving_submit(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
struct bio *bio = &io->bio.bio;
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
continue_at(cl, write_moving, system_wq);
}
static void read_moving(struct cache_set *c)
{
struct keybuf_key *w;
struct moving_io *io;
struct bio *bio;
struct closure cl;
closure_init_stack(&cl);
/* XXX: if we error, background writeback could stall indefinitely */
while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
&MAX_KEY, moving_pred);
if (!w)
break;
if (ptr_stale(c, &w->key, 0)) {
bch_keybuf_del(&c->moving_gc_keys, w);
continue;
}
io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec)
* DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
GFP_KERNEL);
if (!io)
goto err;
w->private = io;
io->w = w;
io->op.inode = KEY_INODE(&w->key);
io->op.c = c;
moving_init(io);
bio = &io->bio.bio;
bio->bi_rw = READ;
bio->bi_end_io = read_moving_endio;
if (bio_alloc_pages(bio, GFP_KERNEL))
goto err;
trace_bcache_gc_copy(&w->key);
down(&c->moving_in_flight);
closure_call(&io->cl, read_moving_submit, NULL, &cl);
}
if (0) {
err: if (!IS_ERR_OR_NULL(w->private))
kfree(w->private);
bch_keybuf_del(&c->moving_gc_keys, w);
}
closure_sync(&cl);
}
static bool bucket_cmp(struct bucket *l, struct bucket *r)
{
return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
}
static unsigned bucket_heap_top(struct cache *ca)
{
struct bucket *b;
return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
}
void bch_moving_gc(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
unsigned i;
if (!c->copy_gc_enabled)
return;
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i) {
unsigned sectors_to_move = 0;
unsigned reserve_sectors = ca->sb.bucket_size *
fifo_used(&ca->free[RESERVE_MOVINGGC]);
ca->heap.used = 0;
for_each_bucket(b, ca) {
if (!GC_SECTORS_USED(b))
continue;
if (!heap_full(&ca->heap)) {
sectors_to_move += GC_SECTORS_USED(b);
heap_add(&ca->heap, b, bucket_cmp);
} else if (bucket_cmp(b, heap_peek(&ca->heap))) {
sectors_to_move -= bucket_heap_top(ca);
sectors_to_move += GC_SECTORS_USED(b);
ca->heap.data[0] = b;
heap_sift(&ca->heap, 0, bucket_cmp);
}
}
while (sectors_to_move > reserve_sectors) {
heap_pop(&ca->heap, b, bucket_cmp);
sectors_to_move -= GC_SECTORS_USED(b);
}
while (heap_pop(&ca->heap, b, bucket_cmp))
SET_GC_MOVE(b, 1);
}
mutex_unlock(&c->bucket_lock);
c->moving_gc_keys.last_scanned = ZERO_KEY;
read_moving(c);
}
void bch_moving_init_cache_set(struct cache_set *c)
{
bch_keybuf_init(&c->moving_gc_keys);
sema_init(&c->moving_in_flight, 64);
}