mirror of
https://github.com/torvalds/linux.git
synced 2024-11-29 23:51:37 +00:00
74d46992e0
This way we don't need a block_device structure to submit I/O. The block_device has different life time rules from the gendisk and request_queue and is usually only available when the block device node is open. Other callers need to explicitly create one (e.g. the lightnvm passthrough code, or the new nvme multipathing code). For the actual I/O path all that we need is the gendisk, which exists once per block device. But given that the block layer also does partition remapping we additionally need a partition index, which is used for said remapping in generic_make_request. Note that all the block drivers generally want request_queue or sometimes the gendisk, so this removes a layer of indirection all over the stack. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
382 lines
8.1 KiB
C
382 lines
8.1 KiB
C
/*
|
|
* Copyright (C) 2005-2007 Red Hat GmbH
|
|
*
|
|
* A target that delays reads and/or writes and can send
|
|
* them to different devices.
|
|
*
|
|
* This file is released under the GPL.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/init.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include <linux/device-mapper.h>
|
|
|
|
#define DM_MSG_PREFIX "delay"
|
|
|
|
struct delay_c {
|
|
struct timer_list delay_timer;
|
|
struct mutex timer_lock;
|
|
struct workqueue_struct *kdelayd_wq;
|
|
struct work_struct flush_expired_bios;
|
|
struct list_head delayed_bios;
|
|
atomic_t may_delay;
|
|
|
|
struct dm_dev *dev_read;
|
|
sector_t start_read;
|
|
unsigned read_delay;
|
|
unsigned reads;
|
|
|
|
struct dm_dev *dev_write;
|
|
sector_t start_write;
|
|
unsigned write_delay;
|
|
unsigned writes;
|
|
};
|
|
|
|
struct dm_delay_info {
|
|
struct delay_c *context;
|
|
struct list_head list;
|
|
unsigned long expires;
|
|
};
|
|
|
|
static DEFINE_MUTEX(delayed_bios_lock);
|
|
|
|
static void handle_delayed_timer(unsigned long data)
|
|
{
|
|
struct delay_c *dc = (struct delay_c *)data;
|
|
|
|
queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
|
|
}
|
|
|
|
static void queue_timeout(struct delay_c *dc, unsigned long expires)
|
|
{
|
|
mutex_lock(&dc->timer_lock);
|
|
|
|
if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
|
|
mod_timer(&dc->delay_timer, expires);
|
|
|
|
mutex_unlock(&dc->timer_lock);
|
|
}
|
|
|
|
static void flush_bios(struct bio *bio)
|
|
{
|
|
struct bio *n;
|
|
|
|
while (bio) {
|
|
n = bio->bi_next;
|
|
bio->bi_next = NULL;
|
|
generic_make_request(bio);
|
|
bio = n;
|
|
}
|
|
}
|
|
|
|
static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
|
|
{
|
|
struct dm_delay_info *delayed, *next;
|
|
unsigned long next_expires = 0;
|
|
int start_timer = 0;
|
|
struct bio_list flush_bios = { };
|
|
|
|
mutex_lock(&delayed_bios_lock);
|
|
list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
|
|
if (flush_all || time_after_eq(jiffies, delayed->expires)) {
|
|
struct bio *bio = dm_bio_from_per_bio_data(delayed,
|
|
sizeof(struct dm_delay_info));
|
|
list_del(&delayed->list);
|
|
bio_list_add(&flush_bios, bio);
|
|
if ((bio_data_dir(bio) == WRITE))
|
|
delayed->context->writes--;
|
|
else
|
|
delayed->context->reads--;
|
|
continue;
|
|
}
|
|
|
|
if (!start_timer) {
|
|
start_timer = 1;
|
|
next_expires = delayed->expires;
|
|
} else
|
|
next_expires = min(next_expires, delayed->expires);
|
|
}
|
|
|
|
mutex_unlock(&delayed_bios_lock);
|
|
|
|
if (start_timer)
|
|
queue_timeout(dc, next_expires);
|
|
|
|
return bio_list_get(&flush_bios);
|
|
}
|
|
|
|
static void flush_expired_bios(struct work_struct *work)
|
|
{
|
|
struct delay_c *dc;
|
|
|
|
dc = container_of(work, struct delay_c, flush_expired_bios);
|
|
flush_bios(flush_delayed_bios(dc, 0));
|
|
}
|
|
|
|
/*
|
|
* Mapping parameters:
|
|
* <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
|
|
*
|
|
* With separate write parameters, the first set is only used for reads.
|
|
* Offsets are specified in sectors.
|
|
* Delays are specified in milliseconds.
|
|
*/
|
|
static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|
{
|
|
struct delay_c *dc;
|
|
unsigned long long tmpll;
|
|
char dummy;
|
|
int ret;
|
|
|
|
if (argc != 3 && argc != 6) {
|
|
ti->error = "Requires exactly 3 or 6 arguments";
|
|
return -EINVAL;
|
|
}
|
|
|
|
dc = kmalloc(sizeof(*dc), GFP_KERNEL);
|
|
if (!dc) {
|
|
ti->error = "Cannot allocate context";
|
|
return -ENOMEM;
|
|
}
|
|
|
|
dc->reads = dc->writes = 0;
|
|
|
|
ret = -EINVAL;
|
|
if (sscanf(argv[1], "%llu%c", &tmpll, &dummy) != 1) {
|
|
ti->error = "Invalid device sector";
|
|
goto bad;
|
|
}
|
|
dc->start_read = tmpll;
|
|
|
|
if (sscanf(argv[2], "%u%c", &dc->read_delay, &dummy) != 1) {
|
|
ti->error = "Invalid delay";
|
|
goto bad;
|
|
}
|
|
|
|
ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
|
|
&dc->dev_read);
|
|
if (ret) {
|
|
ti->error = "Device lookup failed";
|
|
goto bad;
|
|
}
|
|
|
|
ret = -EINVAL;
|
|
dc->dev_write = NULL;
|
|
if (argc == 3)
|
|
goto out;
|
|
|
|
if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1) {
|
|
ti->error = "Invalid write device sector";
|
|
goto bad_dev_read;
|
|
}
|
|
dc->start_write = tmpll;
|
|
|
|
if (sscanf(argv[5], "%u%c", &dc->write_delay, &dummy) != 1) {
|
|
ti->error = "Invalid write delay";
|
|
goto bad_dev_read;
|
|
}
|
|
|
|
ret = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table),
|
|
&dc->dev_write);
|
|
if (ret) {
|
|
ti->error = "Write device lookup failed";
|
|
goto bad_dev_read;
|
|
}
|
|
|
|
out:
|
|
ret = -EINVAL;
|
|
dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
|
|
if (!dc->kdelayd_wq) {
|
|
DMERR("Couldn't start kdelayd");
|
|
goto bad_queue;
|
|
}
|
|
|
|
setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
|
|
|
|
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
|
|
INIT_LIST_HEAD(&dc->delayed_bios);
|
|
mutex_init(&dc->timer_lock);
|
|
atomic_set(&dc->may_delay, 1);
|
|
|
|
ti->num_flush_bios = 1;
|
|
ti->num_discard_bios = 1;
|
|
ti->per_io_data_size = sizeof(struct dm_delay_info);
|
|
ti->private = dc;
|
|
return 0;
|
|
|
|
bad_queue:
|
|
if (dc->dev_write)
|
|
dm_put_device(ti, dc->dev_write);
|
|
bad_dev_read:
|
|
dm_put_device(ti, dc->dev_read);
|
|
bad:
|
|
kfree(dc);
|
|
return ret;
|
|
}
|
|
|
|
static void delay_dtr(struct dm_target *ti)
|
|
{
|
|
struct delay_c *dc = ti->private;
|
|
|
|
destroy_workqueue(dc->kdelayd_wq);
|
|
|
|
dm_put_device(ti, dc->dev_read);
|
|
|
|
if (dc->dev_write)
|
|
dm_put_device(ti, dc->dev_write);
|
|
|
|
kfree(dc);
|
|
}
|
|
|
|
static int delay_bio(struct delay_c *dc, int delay, struct bio *bio)
|
|
{
|
|
struct dm_delay_info *delayed;
|
|
unsigned long expires = 0;
|
|
|
|
if (!delay || !atomic_read(&dc->may_delay))
|
|
return DM_MAPIO_REMAPPED;
|
|
|
|
delayed = dm_per_bio_data(bio, sizeof(struct dm_delay_info));
|
|
|
|
delayed->context = dc;
|
|
delayed->expires = expires = jiffies + msecs_to_jiffies(delay);
|
|
|
|
mutex_lock(&delayed_bios_lock);
|
|
|
|
if (bio_data_dir(bio) == WRITE)
|
|
dc->writes++;
|
|
else
|
|
dc->reads++;
|
|
|
|
list_add_tail(&delayed->list, &dc->delayed_bios);
|
|
|
|
mutex_unlock(&delayed_bios_lock);
|
|
|
|
queue_timeout(dc, expires);
|
|
|
|
return DM_MAPIO_SUBMITTED;
|
|
}
|
|
|
|
static void delay_presuspend(struct dm_target *ti)
|
|
{
|
|
struct delay_c *dc = ti->private;
|
|
|
|
atomic_set(&dc->may_delay, 0);
|
|
del_timer_sync(&dc->delay_timer);
|
|
flush_bios(flush_delayed_bios(dc, 1));
|
|
}
|
|
|
|
static void delay_resume(struct dm_target *ti)
|
|
{
|
|
struct delay_c *dc = ti->private;
|
|
|
|
atomic_set(&dc->may_delay, 1);
|
|
}
|
|
|
|
static int delay_map(struct dm_target *ti, struct bio *bio)
|
|
{
|
|
struct delay_c *dc = ti->private;
|
|
|
|
if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
|
|
bio_set_dev(bio, dc->dev_write->bdev);
|
|
if (bio_sectors(bio))
|
|
bio->bi_iter.bi_sector = dc->start_write +
|
|
dm_target_offset(ti, bio->bi_iter.bi_sector);
|
|
|
|
return delay_bio(dc, dc->write_delay, bio);
|
|
}
|
|
|
|
bio_set_dev(bio, dc->dev_read->bdev);
|
|
bio->bi_iter.bi_sector = dc->start_read +
|
|
dm_target_offset(ti, bio->bi_iter.bi_sector);
|
|
|
|
return delay_bio(dc, dc->read_delay, bio);
|
|
}
|
|
|
|
static void delay_status(struct dm_target *ti, status_type_t type,
|
|
unsigned status_flags, char *result, unsigned maxlen)
|
|
{
|
|
struct delay_c *dc = ti->private;
|
|
int sz = 0;
|
|
|
|
switch (type) {
|
|
case STATUSTYPE_INFO:
|
|
DMEMIT("%u %u", dc->reads, dc->writes);
|
|
break;
|
|
|
|
case STATUSTYPE_TABLE:
|
|
DMEMIT("%s %llu %u", dc->dev_read->name,
|
|
(unsigned long long) dc->start_read,
|
|
dc->read_delay);
|
|
if (dc->dev_write)
|
|
DMEMIT(" %s %llu %u", dc->dev_write->name,
|
|
(unsigned long long) dc->start_write,
|
|
dc->write_delay);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static int delay_iterate_devices(struct dm_target *ti,
|
|
iterate_devices_callout_fn fn, void *data)
|
|
{
|
|
struct delay_c *dc = ti->private;
|
|
int ret = 0;
|
|
|
|
ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data);
|
|
if (ret)
|
|
goto out;
|
|
|
|
if (dc->dev_write)
|
|
ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data);
|
|
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
static struct target_type delay_target = {
|
|
.name = "delay",
|
|
.version = {1, 2, 1},
|
|
.features = DM_TARGET_PASSES_INTEGRITY,
|
|
.module = THIS_MODULE,
|
|
.ctr = delay_ctr,
|
|
.dtr = delay_dtr,
|
|
.map = delay_map,
|
|
.presuspend = delay_presuspend,
|
|
.resume = delay_resume,
|
|
.status = delay_status,
|
|
.iterate_devices = delay_iterate_devices,
|
|
};
|
|
|
|
static int __init dm_delay_init(void)
|
|
{
|
|
int r;
|
|
|
|
r = dm_register_target(&delay_target);
|
|
if (r < 0) {
|
|
DMERR("register failed %d", r);
|
|
goto bad_register;
|
|
}
|
|
|
|
return 0;
|
|
|
|
bad_register:
|
|
return r;
|
|
}
|
|
|
|
static void __exit dm_delay_exit(void)
|
|
{
|
|
dm_unregister_target(&delay_target);
|
|
}
|
|
|
|
/* Module hooks */
|
|
module_init(dm_delay_init);
|
|
module_exit(dm_delay_exit);
|
|
|
|
MODULE_DESCRIPTION(DM_NAME " delay target");
|
|
MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
|
|
MODULE_LICENSE("GPL");
|