4246a0b63b
Currently we have two different ways to signal an I/O error on a BIO: (1) by clearing the BIO_UPTODATE flag (2) by returning a Linux errno value to the bi_end_io callback The first one has the drawback of only communicating a single possible error (-EIO), and the second one has the drawback of not beeing persistent when bios are queued up, and are not passed along from child to parent bio in the ever more popular chaining scenario. Having both mechanisms available has the additional drawback of utterly confusing driver authors and introducing bugs where various I/O submitters only deal with one of them, and the others have to add boilerplate code to deal with both kinds of error returns. So add a new bi_error field to store an errno value directly in struct bio and remove the existing mechanisms to clean all this up. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Hannes Reinecke <hare@suse.de> Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Jens Axboe <axboe@fb.com>
257 lines
5.5 KiB
C
257 lines
5.5 KiB
C
/*
|
|
* Moving/copying garbage collector
|
|
*
|
|
* Copyright 2012 Google, Inc.
|
|
*/
|
|
|
|
#include "bcache.h"
|
|
#include "btree.h"
|
|
#include "debug.h"
|
|
#include "request.h"
|
|
|
|
#include <trace/events/bcache.h>
|
|
|
|
struct moving_io {
|
|
struct closure cl;
|
|
struct keybuf_key *w;
|
|
struct data_insert_op op;
|
|
struct bbio bio;
|
|
};
|
|
|
|
static bool moving_pred(struct keybuf *buf, struct bkey *k)
|
|
{
|
|
struct cache_set *c = container_of(buf, struct cache_set,
|
|
moving_gc_keys);
|
|
unsigned i;
|
|
|
|
for (i = 0; i < KEY_PTRS(k); i++)
|
|
if (ptr_available(c, k, i) &&
|
|
GC_MOVE(PTR_BUCKET(c, k, i)))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Moving GC - IO loop */
|
|
|
|
static void moving_io_destructor(struct closure *cl)
|
|
{
|
|
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
|
kfree(io);
|
|
}
|
|
|
|
static void write_moving_finish(struct closure *cl)
|
|
{
|
|
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
|
struct bio *bio = &io->bio.bio;
|
|
struct bio_vec *bv;
|
|
int i;
|
|
|
|
bio_for_each_segment_all(bv, bio, i)
|
|
__free_page(bv->bv_page);
|
|
|
|
if (io->op.replace_collision)
|
|
trace_bcache_gc_copy_collision(&io->w->key);
|
|
|
|
bch_keybuf_del(&io->op.c->moving_gc_keys, io->w);
|
|
|
|
up(&io->op.c->moving_in_flight);
|
|
|
|
closure_return_with_destructor(cl, moving_io_destructor);
|
|
}
|
|
|
|
static void read_moving_endio(struct bio *bio)
|
|
{
|
|
struct bbio *b = container_of(bio, struct bbio, bio);
|
|
struct moving_io *io = container_of(bio->bi_private,
|
|
struct moving_io, cl);
|
|
|
|
if (bio->bi_error)
|
|
io->op.error = bio->bi_error;
|
|
else if (!KEY_DIRTY(&b->key) &&
|
|
ptr_stale(io->op.c, &b->key, 0)) {
|
|
io->op.error = -EINTR;
|
|
}
|
|
|
|
bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move");
|
|
}
|
|
|
|
static void moving_init(struct moving_io *io)
|
|
{
|
|
struct bio *bio = &io->bio.bio;
|
|
|
|
bio_init(bio);
|
|
bio_get(bio);
|
|
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
|
|
|
|
bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
|
|
bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
|
|
PAGE_SECTORS);
|
|
bio->bi_private = &io->cl;
|
|
bio->bi_io_vec = bio->bi_inline_vecs;
|
|
bch_bio_map(bio, NULL);
|
|
}
|
|
|
|
static void write_moving(struct closure *cl)
|
|
{
|
|
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
|
struct data_insert_op *op = &io->op;
|
|
|
|
if (!op->error) {
|
|
moving_init(io);
|
|
|
|
io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
|
|
op->write_prio = 1;
|
|
op->bio = &io->bio.bio;
|
|
|
|
op->writeback = KEY_DIRTY(&io->w->key);
|
|
op->csum = KEY_CSUM(&io->w->key);
|
|
|
|
bkey_copy(&op->replace_key, &io->w->key);
|
|
op->replace = true;
|
|
|
|
closure_call(&op->cl, bch_data_insert, NULL, cl);
|
|
}
|
|
|
|
continue_at(cl, write_moving_finish, op->wq);
|
|
}
|
|
|
|
static void read_moving_submit(struct closure *cl)
|
|
{
|
|
struct moving_io *io = container_of(cl, struct moving_io, cl);
|
|
struct bio *bio = &io->bio.bio;
|
|
|
|
bch_submit_bbio(bio, io->op.c, &io->w->key, 0);
|
|
|
|
continue_at(cl, write_moving, io->op.wq);
|
|
}
|
|
|
|
static void read_moving(struct cache_set *c)
|
|
{
|
|
struct keybuf_key *w;
|
|
struct moving_io *io;
|
|
struct bio *bio;
|
|
struct closure cl;
|
|
|
|
closure_init_stack(&cl);
|
|
|
|
/* XXX: if we error, background writeback could stall indefinitely */
|
|
|
|
while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
|
|
w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
|
|
&MAX_KEY, moving_pred);
|
|
if (!w)
|
|
break;
|
|
|
|
if (ptr_stale(c, &w->key, 0)) {
|
|
bch_keybuf_del(&c->moving_gc_keys, w);
|
|
continue;
|
|
}
|
|
|
|
io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec)
|
|
* DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
|
|
GFP_KERNEL);
|
|
if (!io)
|
|
goto err;
|
|
|
|
w->private = io;
|
|
io->w = w;
|
|
io->op.inode = KEY_INODE(&w->key);
|
|
io->op.c = c;
|
|
io->op.wq = c->moving_gc_wq;
|
|
|
|
moving_init(io);
|
|
bio = &io->bio.bio;
|
|
|
|
bio->bi_rw = READ;
|
|
bio->bi_end_io = read_moving_endio;
|
|
|
|
if (bio_alloc_pages(bio, GFP_KERNEL))
|
|
goto err;
|
|
|
|
trace_bcache_gc_copy(&w->key);
|
|
|
|
down(&c->moving_in_flight);
|
|
closure_call(&io->cl, read_moving_submit, NULL, &cl);
|
|
}
|
|
|
|
if (0) {
|
|
err: if (!IS_ERR_OR_NULL(w->private))
|
|
kfree(w->private);
|
|
|
|
bch_keybuf_del(&c->moving_gc_keys, w);
|
|
}
|
|
|
|
closure_sync(&cl);
|
|
}
|
|
|
|
static bool bucket_cmp(struct bucket *l, struct bucket *r)
|
|
{
|
|
return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
|
|
}
|
|
|
|
static unsigned bucket_heap_top(struct cache *ca)
|
|
{
|
|
struct bucket *b;
|
|
return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
|
|
}
|
|
|
|
void bch_moving_gc(struct cache_set *c)
|
|
{
|
|
struct cache *ca;
|
|
struct bucket *b;
|
|
unsigned i;
|
|
|
|
if (!c->copy_gc_enabled)
|
|
return;
|
|
|
|
mutex_lock(&c->bucket_lock);
|
|
|
|
for_each_cache(ca, c, i) {
|
|
unsigned sectors_to_move = 0;
|
|
unsigned reserve_sectors = ca->sb.bucket_size *
|
|
fifo_used(&ca->free[RESERVE_MOVINGGC]);
|
|
|
|
ca->heap.used = 0;
|
|
|
|
for_each_bucket(b, ca) {
|
|
if (GC_MARK(b) == GC_MARK_METADATA ||
|
|
!GC_SECTORS_USED(b) ||
|
|
GC_SECTORS_USED(b) == ca->sb.bucket_size ||
|
|
atomic_read(&b->pin))
|
|
continue;
|
|
|
|
if (!heap_full(&ca->heap)) {
|
|
sectors_to_move += GC_SECTORS_USED(b);
|
|
heap_add(&ca->heap, b, bucket_cmp);
|
|
} else if (bucket_cmp(b, heap_peek(&ca->heap))) {
|
|
sectors_to_move -= bucket_heap_top(ca);
|
|
sectors_to_move += GC_SECTORS_USED(b);
|
|
|
|
ca->heap.data[0] = b;
|
|
heap_sift(&ca->heap, 0, bucket_cmp);
|
|
}
|
|
}
|
|
|
|
while (sectors_to_move > reserve_sectors) {
|
|
heap_pop(&ca->heap, b, bucket_cmp);
|
|
sectors_to_move -= GC_SECTORS_USED(b);
|
|
}
|
|
|
|
while (heap_pop(&ca->heap, b, bucket_cmp))
|
|
SET_GC_MOVE(b, 1);
|
|
}
|
|
|
|
mutex_unlock(&c->bucket_lock);
|
|
|
|
c->moving_gc_keys.last_scanned = ZERO_KEY;
|
|
|
|
read_moving(c);
|
|
}
|
|
|
|
void bch_moving_init_cache_set(struct cache_set *c)
|
|
{
|
|
bch_keybuf_init(&c->moving_gc_keys);
|
|
sema_init(&c->moving_in_flight, 64);
|
|
}
|