dm io: remove extra bi_io_vec region hack

Remove the hack where we allocate an extra bi_io_vec to store additional
private data.  This hack prevents us from supporting barriers in
dm-raid1 without first making another little block layer change.
Instead of doing that, this patch eliminates the bi_io_vec abuse by
storing the region number directly in the low bits of bi_private.

We need to store two things for each bio, the pointer to the main io
structure and, if parallel writes were requested, an index indicating
which of these writes this bio belongs to.  There can be at most
BITS_PER_LONG regions - 32 or 64.

The index (region number) was stored in the last (hidden) bio vector and
the pointer to struct io was stored in bi_private.

This patch now aligns "struct io" on BITS_PER_LONG bytes and stores the
region number in the low BITS_PER_LONG bits of bi_private.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
This commit is contained in:
Mikulas Patocka 2009-12-10 23:51:58 +00:00 committed by Alasdair G Kergon
parent 952b355760
commit f1e5398746

View File

@ -16,12 +16,19 @@
#include <linux/slab.h>
#include <linux/dm-io.h>
#define DM_MSG_PREFIX "io"
#define DM_IO_MAX_REGIONS BITS_PER_LONG
struct dm_io_client {
mempool_t *pool;
struct bio_set *bios;
};
/* FIXME: can we shrink this ? */
/*
* Aligning 'struct io' reduces the number of bits required to store
* its address. Refer to store_io_and_region_in_bio() below.
*/
struct io {
unsigned long error_bits;
unsigned long eopnotsupp_bits;
@ -30,7 +37,7 @@ struct io {
struct dm_io_client *client;
io_notify_fn callback;
void *context;
};
} __attribute__((aligned(DM_IO_MAX_REGIONS)));
static struct kmem_cache *_dm_io_cache;
@ -92,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy);
/*-----------------------------------------------------------------
* We need to keep track of which region a bio is doing io for.
* In order to save a memory allocation we store this the last
* bvec which we know is unused (blech).
* XXX This is ugly and can OOPS with some configs... find another way.
* To avoid a memory allocation to store just 5 or 6 bits, we
* ensure the 'struct io' pointer is aligned so enough low bits are
* always zero and then combine it with the region number directly in
* bi_private.
*---------------------------------------------------------------*/
static inline void bio_set_region(struct bio *bio, unsigned region)
static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
unsigned region)
{
bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
DMCRIT("Unaligned struct io pointer %p", io);
BUG();
}
bio->bi_private = (void *)((unsigned long)io | region);
}
static inline unsigned bio_get_region(struct bio *bio)
static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
unsigned *region)
{
return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
unsigned long val = (unsigned long)bio->bi_private;
*io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
*region = val & (DM_IO_MAX_REGIONS - 1);
}
/*-----------------------------------------------------------------
@ -144,10 +162,8 @@ static void endio(struct bio *bio, int error)
/*
* The bio destructor in bio_put() may use the io object.
*/
io = bio->bi_private;
region = bio_get_region(bio);
retrieve_io_and_region_from_bio(bio, &io, &region);
bio->bi_max_vecs++;
bio_put(bio);
dec_count(io, region, error);
@ -247,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data)
static void dm_bio_destructor(struct bio *bio)
{
struct io *io = bio->bi_private;
unsigned region;
struct io *io;
retrieve_io_and_region_from_bio(bio, &io, &region);
bio_free(bio, io->client->bios);
}
@ -292,24 +311,17 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
while (remaining) {
/*
* Allocate a suitably sized-bio: we add an extra
* bvec for bio_get/set_region() and decrement bi_max_vecs
* to hide it from bio_add_page().
* Allocate a suitably sized-bio.
*/
num_bvecs = dm_sector_div_up(remaining,
(PAGE_SIZE >> SECTOR_SHIFT));
num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev),
num_bvecs);
if (unlikely(num_bvecs > BIO_MAX_PAGES))
num_bvecs = BIO_MAX_PAGES;
num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
bio->bi_private = io;
bio->bi_destructor = dm_bio_destructor;
bio->bi_max_vecs--;
bio_set_region(bio, region);
store_io_and_region_in_bio(bio, io, region);
/*
* Try and add as many pages as possible.
@ -337,6 +349,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
int i;
struct dpages old_pages = *dp;
BUG_ON(num_regions > DM_IO_MAX_REGIONS);
if (sync)
rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
@ -361,7 +375,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
struct io io;
/*
* gcc <= 4.3 can't do the alignment for stack variables, so we must
* align it on our own.
* volatile prevents the optimizer from removing or reusing
* "io_" field from the stack frame (allowed in ANSI C).
*/
volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
@ -369,33 +390,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
}
retry:
io.error_bits = 0;
io.eopnotsupp_bits = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */
io.sleeper = current;
io.client = client;
io->error_bits = 0;
io->eopnotsupp_bits = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = current;
io->client = client;
dispatch_io(rw, num_regions, where, dp, &io, 1);
dispatch_io(rw, num_regions, where, dp, io, 1);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!atomic_read(&io.count))
if (!atomic_read(&io->count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
rw &= ~(1 << BIO_RW_BARRIER);
goto retry;
}
if (error_bits)
*error_bits = io.error_bits;
*error_bits = io->error_bits;
return io.error_bits ? -EIO : 0;
return io->error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,