Merge branch 'md-next' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.6/drivers
Pull MD changes from Song. * 'md-next' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md: md/raid1: introduce wait_for_serialization md/raid1: use bucket based mechanism for IO serialization md: introduce a new struct for IO serialization md: don't destroy serial_info_pool if serialize_policy is true raid1: serialize the overlap write md: reorgnize mddev_create/destroy_serial_pool md: add serialize_policy sysfs node for raid1 md: prepare for enable raid1 io serialization md: fix a typo s/creat/create md: rename wb stuffs raid5: remove worker_cnt_per_group argument from alloc_thread_groups md/raid6: fix algorithm choice under larger PAGE_SIZE raid6/test: fix a compilation warning raid6/test: fix a compilation error md-bitmap: small cleanups
This commit is contained in:
commit
7454049eb7
@ -1019,8 +1019,6 @@ void md_bitmap_unplug(struct bitmap *bitmap)
|
||||
/* look at each page to see if there are any set bits that need to be
|
||||
* flushed out to disk */
|
||||
for (i = 0; i < bitmap->storage.file_pages; i++) {
|
||||
if (!bitmap->storage.filemap)
|
||||
return;
|
||||
dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
|
||||
need_write = test_and_clear_page_attr(bitmap, i,
|
||||
BITMAP_PAGE_NEEDWRITE);
|
||||
@ -1338,7 +1336,8 @@ void md_bitmap_daemon_work(struct mddev *mddev)
|
||||
BITMAP_PAGE_DIRTY))
|
||||
/* bitmap_unplug will handle the rest */
|
||||
break;
|
||||
if (test_and_clear_page_attr(bitmap, j,
|
||||
if (bitmap->storage.filemap &&
|
||||
test_and_clear_page_attr(bitmap, j,
|
||||
BITMAP_PAGE_NEEDWRITE)) {
|
||||
write_page(bitmap, bitmap->storage.filemap[j], 0);
|
||||
}
|
||||
@ -1790,8 +1789,8 @@ void md_bitmap_destroy(struct mddev *mddev)
|
||||
return;
|
||||
|
||||
md_bitmap_wait_behind_writes(mddev);
|
||||
mempool_destroy(mddev->wb_info_pool);
|
||||
mddev->wb_info_pool = NULL;
|
||||
if (!mddev->serialize_policy)
|
||||
mddev_destroy_serial_pool(mddev, NULL, true);
|
||||
|
||||
mutex_lock(&mddev->bitmap_info.mutex);
|
||||
spin_lock(&mddev->lock);
|
||||
@ -1908,7 +1907,7 @@ int md_bitmap_load(struct mddev *mddev)
|
||||
goto out;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
mddev_create_wb_pool(mddev, rdev, true);
|
||||
mddev_create_serial_pool(mddev, rdev, true);
|
||||
|
||||
if (mddev_is_clustered(mddev))
|
||||
md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes);
|
||||
@ -2475,16 +2474,16 @@ backlog_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
if (backlog > COUNTER_MAX)
|
||||
return -EINVAL;
|
||||
mddev->bitmap_info.max_write_behind = backlog;
|
||||
if (!backlog && mddev->wb_info_pool) {
|
||||
/* wb_info_pool is not needed if backlog is zero */
|
||||
mempool_destroy(mddev->wb_info_pool);
|
||||
mddev->wb_info_pool = NULL;
|
||||
} else if (backlog && !mddev->wb_info_pool) {
|
||||
/* wb_info_pool is needed since backlog is not zero */
|
||||
if (!backlog && mddev->serial_info_pool) {
|
||||
/* serial_info_pool is not needed if backlog is zero */
|
||||
if (!mddev->serialize_policy)
|
||||
mddev_destroy_serial_pool(mddev, NULL, false);
|
||||
} else if (backlog && !mddev->serial_info_pool) {
|
||||
/* serial_info_pool is needed since backlog is not zero */
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
mddev_create_wb_pool(mddev, rdev, false);
|
||||
mddev_create_serial_pool(mddev, rdev, false);
|
||||
}
|
||||
if (old_mwb != backlog)
|
||||
md_bitmap_update_sb(mddev->bitmap);
|
||||
|
272
drivers/md/md.c
272
drivers/md/md.c
@ -125,76 +125,167 @@ static inline int speed_max(struct mddev *mddev)
|
||||
mddev->sync_speed_max : sysctl_speed_limit_max;
|
||||
}
|
||||
|
||||
static int rdev_init_wb(struct md_rdev *rdev)
|
||||
static void rdev_uninit_serial(struct md_rdev *rdev)
|
||||
{
|
||||
if (rdev->bdev->bd_queue->nr_hw_queues == 1)
|
||||
if (!test_and_clear_bit(CollisionCheck, &rdev->flags))
|
||||
return;
|
||||
|
||||
kvfree(rdev->serial);
|
||||
rdev->serial = NULL;
|
||||
}
|
||||
|
||||
static void rdevs_uninit_serial(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev_uninit_serial(rdev);
|
||||
}
|
||||
|
||||
static int rdev_init_serial(struct md_rdev *rdev)
|
||||
{
|
||||
/* serial_nums equals with BARRIER_BUCKETS_NR */
|
||||
int i, serial_nums = 1 << ((PAGE_SHIFT - ilog2(sizeof(atomic_t))));
|
||||
struct serial_in_rdev *serial = NULL;
|
||||
|
||||
if (test_bit(CollisionCheck, &rdev->flags))
|
||||
return 0;
|
||||
|
||||
spin_lock_init(&rdev->wb_list_lock);
|
||||
INIT_LIST_HEAD(&rdev->wb_list);
|
||||
init_waitqueue_head(&rdev->wb_io_wait);
|
||||
set_bit(WBCollisionCheck, &rdev->flags);
|
||||
serial = kvmalloc(sizeof(struct serial_in_rdev) * serial_nums,
|
||||
GFP_KERNEL);
|
||||
if (!serial)
|
||||
return -ENOMEM;
|
||||
|
||||
return 1;
|
||||
for (i = 0; i < serial_nums; i++) {
|
||||
struct serial_in_rdev *serial_tmp = &serial[i];
|
||||
|
||||
spin_lock_init(&serial_tmp->serial_lock);
|
||||
serial_tmp->serial_rb = RB_ROOT_CACHED;
|
||||
init_waitqueue_head(&serial_tmp->serial_io_wait);
|
||||
}
|
||||
|
||||
rdev->serial = serial;
|
||||
set_bit(CollisionCheck, &rdev->flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int rdevs_init_serial(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int ret = 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
ret = rdev_init_serial(rdev);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Free all resources if pool is not existed */
|
||||
if (ret && !mddev->serial_info_pool)
|
||||
rdevs_uninit_serial(mddev);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create wb_info_pool if rdev is the first multi-queue device flaged
|
||||
* with writemostly, also write-behind mode is enabled.
|
||||
* rdev needs to enable serial stuffs if it meets the conditions:
|
||||
* 1. it is multi-queue device flaged with writemostly.
|
||||
* 2. the write-behind mode is enabled.
|
||||
*/
|
||||
void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend)
|
||||
static int rdev_need_serial(struct md_rdev *rdev)
|
||||
{
|
||||
if (mddev->bitmap_info.max_write_behind == 0)
|
||||
return (rdev && rdev->mddev->bitmap_info.max_write_behind > 0 &&
|
||||
rdev->bdev->bd_queue->nr_hw_queues != 1 &&
|
||||
test_bit(WriteMostly, &rdev->flags));
|
||||
}
|
||||
|
||||
/*
|
||||
* Init resource for rdev(s), then create serial_info_pool if:
|
||||
* 1. rdev is the first device which return true from rdev_enable_serial.
|
||||
* 2. rdev is NULL, means we want to enable serialization for all rdevs.
|
||||
*/
|
||||
void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (rdev && !rdev_need_serial(rdev) &&
|
||||
!test_bit(CollisionCheck, &rdev->flags))
|
||||
return;
|
||||
|
||||
if (!test_bit(WriteMostly, &rdev->flags) || !rdev_init_wb(rdev))
|
||||
return;
|
||||
if (!is_suspend)
|
||||
mddev_suspend(mddev);
|
||||
|
||||
if (mddev->wb_info_pool == NULL) {
|
||||
if (!rdev)
|
||||
ret = rdevs_init_serial(mddev);
|
||||
else
|
||||
ret = rdev_init_serial(rdev);
|
||||
if (ret)
|
||||
goto abort;
|
||||
|
||||
if (mddev->serial_info_pool == NULL) {
|
||||
unsigned int noio_flag;
|
||||
|
||||
noio_flag = memalloc_noio_save();
|
||||
mddev->serial_info_pool =
|
||||
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
|
||||
sizeof(struct serial_info));
|
||||
memalloc_noio_restore(noio_flag);
|
||||
if (!mddev->serial_info_pool) {
|
||||
rdevs_uninit_serial(mddev);
|
||||
pr_err("can't alloc memory pool for serialization\n");
|
||||
}
|
||||
}
|
||||
|
||||
abort:
|
||||
if (!is_suspend)
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free resource from rdev(s), and destroy serial_info_pool under conditions:
|
||||
* 1. rdev is the last device flaged with CollisionCheck.
|
||||
* 2. when bitmap is destroyed while policy is not enabled.
|
||||
* 3. for disable policy, the pool is destroyed only when no rdev needs it.
|
||||
*/
|
||||
void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend)
|
||||
{
|
||||
if (rdev && !test_bit(CollisionCheck, &rdev->flags))
|
||||
return;
|
||||
|
||||
if (mddev->serial_info_pool) {
|
||||
struct md_rdev *temp;
|
||||
int num = 0; /* used to track if other rdevs need the pool */
|
||||
|
||||
if (!is_suspend)
|
||||
mddev_suspend(mddev);
|
||||
noio_flag = memalloc_noio_save();
|
||||
mddev->wb_info_pool = mempool_create_kmalloc_pool(NR_WB_INFOS,
|
||||
sizeof(struct wb_info));
|
||||
memalloc_noio_restore(noio_flag);
|
||||
if (!mddev->wb_info_pool)
|
||||
pr_err("can't alloc memory pool for writemostly\n");
|
||||
rdev_for_each(temp, mddev) {
|
||||
if (!rdev) {
|
||||
if (!mddev->serialize_policy ||
|
||||
!rdev_need_serial(temp))
|
||||
rdev_uninit_serial(temp);
|
||||
else
|
||||
num++;
|
||||
} else if (temp != rdev &&
|
||||
test_bit(CollisionCheck, &temp->flags))
|
||||
num++;
|
||||
}
|
||||
|
||||
if (rdev)
|
||||
rdev_uninit_serial(rdev);
|
||||
|
||||
if (num)
|
||||
pr_info("The mempool could be used by other devices\n");
|
||||
else {
|
||||
mempool_destroy(mddev->serial_info_pool);
|
||||
mddev->serial_info_pool = NULL;
|
||||
}
|
||||
if (!is_suspend)
|
||||
mddev_resume(mddev);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_create_wb_pool);
|
||||
|
||||
/*
|
||||
* destroy wb_info_pool if rdev is the last device flaged with WBCollisionCheck.
|
||||
*/
|
||||
static void mddev_destroy_wb_pool(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
if (!test_and_clear_bit(WBCollisionCheck, &rdev->flags))
|
||||
return;
|
||||
|
||||
if (mddev->wb_info_pool) {
|
||||
struct md_rdev *temp;
|
||||
int num = 0;
|
||||
|
||||
/*
|
||||
* Check if other rdevs need wb_info_pool.
|
||||
*/
|
||||
rdev_for_each(temp, mddev)
|
||||
if (temp != rdev &&
|
||||
test_bit(WBCollisionCheck, &temp->flags))
|
||||
num++;
|
||||
if (!num) {
|
||||
mddev_suspend(rdev->mddev);
|
||||
mempool_destroy(mddev->wb_info_pool);
|
||||
mddev->wb_info_pool = NULL;
|
||||
mddev_resume(rdev->mddev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table_header *raid_table_header;
|
||||
|
||||
@ -2337,7 +2428,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
|
||||
pr_debug("md: bind<%s>\n", b);
|
||||
|
||||
if (mddev->raid_disks)
|
||||
mddev_create_wb_pool(mddev, rdev, false);
|
||||
mddev_create_serial_pool(mddev, rdev, false);
|
||||
|
||||
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
|
||||
goto fail;
|
||||
@ -2375,7 +2466,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
|
||||
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
|
||||
list_del_rcu(&rdev->same_set);
|
||||
pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
|
||||
mddev_destroy_wb_pool(rdev->mddev, rdev);
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev, false);
|
||||
rdev->mddev = NULL;
|
||||
sysfs_remove_link(&rdev->kobj, "block");
|
||||
sysfs_put(rdev->sysfs_state);
|
||||
@ -2888,10 +2979,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
}
|
||||
} else if (cmd_match(buf, "writemostly")) {
|
||||
set_bit(WriteMostly, &rdev->flags);
|
||||
mddev_create_wb_pool(rdev->mddev, rdev, false);
|
||||
mddev_create_serial_pool(rdev->mddev, rdev, false);
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "-writemostly")) {
|
||||
mddev_destroy_wb_pool(rdev->mddev, rdev);
|
||||
mddev_destroy_serial_pool(rdev->mddev, rdev, false);
|
||||
clear_bit(WriteMostly, &rdev->flags);
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "blocked")) {
|
||||
@ -5277,6 +5368,57 @@ static struct md_sysfs_entry md_fail_last_dev =
|
||||
__ATTR(fail_last_dev, S_IRUGO | S_IWUSR, fail_last_dev_show,
|
||||
fail_last_dev_store);
|
||||
|
||||
static ssize_t serialize_policy_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
if (mddev->pers == NULL || (mddev->pers->level != 1))
|
||||
return sprintf(page, "n/a\n");
|
||||
else
|
||||
return sprintf(page, "%d\n", mddev->serialize_policy);
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting serialize_policy to true to enforce write IO is not reordered
|
||||
* for raid1.
|
||||
*/
|
||||
static ssize_t
|
||||
serialize_policy_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
{
|
||||
int err;
|
||||
bool value;
|
||||
|
||||
err = kstrtobool(buf, &value);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (value == mddev->serialize_policy)
|
||||
return len;
|
||||
|
||||
err = mddev_lock(mddev);
|
||||
if (err)
|
||||
return err;
|
||||
if (mddev->pers == NULL || (mddev->pers->level != 1)) {
|
||||
pr_err("md: serialize_policy is only effective for raid1\n");
|
||||
err = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
mddev_suspend(mddev);
|
||||
if (value)
|
||||
mddev_create_serial_pool(mddev, NULL, true);
|
||||
else
|
||||
mddev_destroy_serial_pool(mddev, NULL, true);
|
||||
mddev->serialize_policy = value;
|
||||
mddev_resume(mddev);
|
||||
unlock:
|
||||
mddev_unlock(mddev);
|
||||
return err ?: len;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry md_serialize_policy =
|
||||
__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
|
||||
serialize_policy_store);
|
||||
|
||||
|
||||
static struct attribute *md_default_attrs[] = {
|
||||
&md_level.attr,
|
||||
&md_layout.attr,
|
||||
@ -5294,6 +5436,7 @@ static struct attribute *md_default_attrs[] = {
|
||||
&max_corr_read_errors.attr,
|
||||
&md_consistency_policy.attr,
|
||||
&md_fail_last_dev.attr,
|
||||
&md_serialize_policy.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -5769,18 +5912,18 @@ int md_run(struct mddev *mddev)
|
||||
goto bitmap_abort;
|
||||
|
||||
if (mddev->bitmap_info.max_write_behind > 0) {
|
||||
bool creat_pool = false;
|
||||
bool create_pool = false;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (test_bit(WriteMostly, &rdev->flags) &&
|
||||
rdev_init_wb(rdev))
|
||||
creat_pool = true;
|
||||
rdev_init_serial(rdev))
|
||||
create_pool = true;
|
||||
}
|
||||
if (creat_pool && mddev->wb_info_pool == NULL) {
|
||||
mddev->wb_info_pool =
|
||||
mempool_create_kmalloc_pool(NR_WB_INFOS,
|
||||
sizeof(struct wb_info));
|
||||
if (!mddev->wb_info_pool) {
|
||||
if (create_pool && mddev->serial_info_pool == NULL) {
|
||||
mddev->serial_info_pool =
|
||||
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
|
||||
sizeof(struct serial_info));
|
||||
if (!mddev->serial_info_pool) {
|
||||
err = -ENOMEM;
|
||||
goto bitmap_abort;
|
||||
}
|
||||
@ -6025,8 +6168,9 @@ static void __md_stop_writes(struct mddev *mddev)
|
||||
mddev->in_sync = 1;
|
||||
md_update_sb(mddev, 1);
|
||||
}
|
||||
mempool_destroy(mddev->wb_info_pool);
|
||||
mddev->wb_info_pool = NULL;
|
||||
/* disable policy to guarantee rdevs free resources for serialization */
|
||||
mddev->serialize_policy = 0;
|
||||
mddev_destroy_serial_pool(mddev, NULL, true);
|
||||
}
|
||||
|
||||
void md_stop_writes(struct mddev *mddev)
|
||||
|
@ -32,6 +32,16 @@
|
||||
* be retried.
|
||||
*/
|
||||
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
|
||||
|
||||
/*
|
||||
* The struct embedded in rdev is used to serialize IO.
|
||||
*/
|
||||
struct serial_in_rdev {
|
||||
struct rb_root_cached serial_rb;
|
||||
spinlock_t serial_lock;
|
||||
wait_queue_head_t serial_io_wait;
|
||||
};
|
||||
|
||||
/*
|
||||
* MD's 'extended' device
|
||||
*/
|
||||
@ -110,12 +120,7 @@ struct md_rdev {
|
||||
* in superblock.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The members for check collision of write behind IOs.
|
||||
*/
|
||||
struct list_head wb_list;
|
||||
spinlock_t wb_list_lock;
|
||||
wait_queue_head_t wb_io_wait;
|
||||
struct serial_in_rdev *serial; /* used for raid1 io serialization */
|
||||
|
||||
struct work_struct del_work; /* used for delayed sysfs removal */
|
||||
|
||||
@ -201,9 +206,9 @@ enum flag_bits {
|
||||
* it didn't fail, so don't use FailFast
|
||||
* any more for metadata
|
||||
*/
|
||||
WBCollisionCheck, /*
|
||||
* multiqueue device should check if there
|
||||
* is collision between write behind bios.
|
||||
CollisionCheck, /*
|
||||
* check if there is collision between raid1
|
||||
* serial bios.
|
||||
*/
|
||||
};
|
||||
|
||||
@ -263,12 +268,13 @@ enum mddev_sb_flags {
|
||||
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
|
||||
};
|
||||
|
||||
#define NR_WB_INFOS 8
|
||||
/* record current range of write behind IOs */
|
||||
struct wb_info {
|
||||
sector_t lo;
|
||||
sector_t hi;
|
||||
struct list_head list;
|
||||
#define NR_SERIAL_INFOS 8
|
||||
/* record current range of serialize IOs */
|
||||
struct serial_info {
|
||||
struct rb_node node;
|
||||
sector_t start; /* start sector of rb node */
|
||||
sector_t last; /* end sector of rb node */
|
||||
sector_t _subtree_last; /* highest sector in subtree of rb node */
|
||||
};
|
||||
|
||||
struct mddev {
|
||||
@ -487,13 +493,14 @@ struct mddev {
|
||||
*/
|
||||
struct work_struct flush_work;
|
||||
struct work_struct event_work; /* used by dm to report failure event */
|
||||
mempool_t *wb_info_pool;
|
||||
mempool_t *serial_info_pool;
|
||||
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
|
||||
struct md_cluster_info *cluster_info;
|
||||
unsigned int good_device_nr; /* good device num within cluster raid */
|
||||
|
||||
bool has_superblocks:1;
|
||||
bool fail_last_dev:1;
|
||||
bool serialize_policy:1;
|
||||
};
|
||||
|
||||
enum recovery_flags {
|
||||
@ -737,8 +744,10 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
|
||||
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
|
||||
extern void md_update_sb(struct mddev *mddev, int force);
|
||||
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
|
||||
extern void mddev_create_wb_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend);
|
||||
extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend);
|
||||
extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
|
||||
bool is_suspend);
|
||||
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
|
||||
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);
|
||||
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/interval_tree_generic.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -50,55 +51,71 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
|
||||
|
||||
#include "raid1-10.c"
|
||||
|
||||
static int check_and_add_wb(struct md_rdev *rdev, sector_t lo, sector_t hi)
|
||||
#define START(node) ((node)->start)
|
||||
#define LAST(node) ((node)->last)
|
||||
INTERVAL_TREE_DEFINE(struct serial_info, node, sector_t, _subtree_last,
|
||||
START, LAST, static inline, raid1_rb);
|
||||
|
||||
static int check_and_add_serial(struct md_rdev *rdev, struct r1bio *r1_bio,
|
||||
struct serial_info *si, int idx)
|
||||
{
|
||||
struct wb_info *wi, *temp_wi;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
sector_t lo = r1_bio->sector;
|
||||
sector_t hi = lo + r1_bio->sectors;
|
||||
struct serial_in_rdev *serial = &rdev->serial[idx];
|
||||
|
||||
wi = mempool_alloc(mddev->wb_info_pool, GFP_NOIO);
|
||||
|
||||
spin_lock_irqsave(&rdev->wb_list_lock, flags);
|
||||
list_for_each_entry(temp_wi, &rdev->wb_list, list) {
|
||||
/* collision happened */
|
||||
if (hi > temp_wi->lo && lo < temp_wi->hi) {
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
spin_lock_irqsave(&serial->serial_lock, flags);
|
||||
/* collision happened */
|
||||
if (raid1_rb_iter_first(&serial->serial_rb, lo, hi))
|
||||
ret = -EBUSY;
|
||||
else {
|
||||
si->start = lo;
|
||||
si->last = hi;
|
||||
raid1_rb_insert(si, &serial->serial_rb);
|
||||
}
|
||||
|
||||
if (!ret) {
|
||||
wi->lo = lo;
|
||||
wi->hi = hi;
|
||||
list_add(&wi->list, &rdev->wb_list);
|
||||
} else
|
||||
mempool_free(wi, mddev->wb_info_pool);
|
||||
spin_unlock_irqrestore(&rdev->wb_list_lock, flags);
|
||||
spin_unlock_irqrestore(&serial->serial_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void remove_wb(struct md_rdev *rdev, sector_t lo, sector_t hi)
|
||||
static void wait_for_serialization(struct md_rdev *rdev, struct r1bio *r1_bio)
|
||||
{
|
||||
struct wb_info *wi;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
struct serial_info *si;
|
||||
int idx = sector_to_idx(r1_bio->sector);
|
||||
struct serial_in_rdev *serial = &rdev->serial[idx];
|
||||
|
||||
if (WARN_ON(!mddev->serial_info_pool))
|
||||
return;
|
||||
si = mempool_alloc(mddev->serial_info_pool, GFP_NOIO);
|
||||
wait_event(serial->serial_io_wait,
|
||||
check_and_add_serial(rdev, r1_bio, si, idx) == 0);
|
||||
}
|
||||
|
||||
static void remove_serial(struct md_rdev *rdev, sector_t lo, sector_t hi)
|
||||
{
|
||||
struct serial_info *si;
|
||||
unsigned long flags;
|
||||
int found = 0;
|
||||
struct mddev *mddev = rdev->mddev;
|
||||
int idx = sector_to_idx(lo);
|
||||
struct serial_in_rdev *serial = &rdev->serial[idx];
|
||||
|
||||
spin_lock_irqsave(&rdev->wb_list_lock, flags);
|
||||
list_for_each_entry(wi, &rdev->wb_list, list)
|
||||
if (hi == wi->hi && lo == wi->lo) {
|
||||
list_del(&wi->list);
|
||||
mempool_free(wi, mddev->wb_info_pool);
|
||||
spin_lock_irqsave(&serial->serial_lock, flags);
|
||||
for (si = raid1_rb_iter_first(&serial->serial_rb, lo, hi);
|
||||
si; si = raid1_rb_iter_next(si, lo, hi)) {
|
||||
if (si->start == lo && si->last == hi) {
|
||||
raid1_rb_remove(si, &serial->serial_rb);
|
||||
mempool_free(si, mddev->serial_info_pool);
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
if (!found)
|
||||
WARN(1, "The write behind IO is not recorded\n");
|
||||
spin_unlock_irqrestore(&rdev->wb_list_lock, flags);
|
||||
wake_up(&rdev->wb_io_wait);
|
||||
WARN(1, "The write IO is not recorded for serialization\n");
|
||||
spin_unlock_irqrestore(&serial->serial_lock, flags);
|
||||
wake_up(&serial->serial_io_wait);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -430,6 +447,8 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
int mirror = find_bio_disk(r1_bio, bio);
|
||||
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
|
||||
bool discard_error;
|
||||
sector_t lo = r1_bio->sector;
|
||||
sector_t hi = r1_bio->sector + r1_bio->sectors;
|
||||
|
||||
discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
|
||||
|
||||
@ -499,12 +518,8 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
}
|
||||
|
||||
if (behind) {
|
||||
if (test_bit(WBCollisionCheck, &rdev->flags)) {
|
||||
sector_t lo = r1_bio->sector;
|
||||
sector_t hi = r1_bio->sector + r1_bio->sectors;
|
||||
|
||||
remove_wb(rdev, lo, hi);
|
||||
}
|
||||
if (test_bit(CollisionCheck, &rdev->flags))
|
||||
remove_serial(rdev, lo, hi);
|
||||
if (test_bit(WriteMostly, &rdev->flags))
|
||||
atomic_dec(&r1_bio->behind_remaining);
|
||||
|
||||
@ -527,7 +542,8 @@ static void raid1_end_write_request(struct bio *bio)
|
||||
call_bio_endio(r1_bio);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (rdev->mddev->serialize_policy)
|
||||
remove_serial(rdev, lo, hi);
|
||||
if (r1_bio->bios[mirror] == NULL)
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
|
||||
@ -1479,6 +1495,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct bio *mbio = NULL;
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
if (!r1_bio->bios[i])
|
||||
continue;
|
||||
|
||||
@ -1506,18 +1523,12 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
|
||||
mbio = bio_clone_fast(bio, GFP_NOIO, &mddev->bio_set);
|
||||
|
||||
if (r1_bio->behind_master_bio) {
|
||||
struct md_rdev *rdev = conf->mirrors[i].rdev;
|
||||
|
||||
if (test_bit(WBCollisionCheck, &rdev->flags)) {
|
||||
sector_t lo = r1_bio->sector;
|
||||
sector_t hi = r1_bio->sector + r1_bio->sectors;
|
||||
|
||||
wait_event(rdev->wb_io_wait,
|
||||
check_and_add_wb(rdev, lo, hi) == 0);
|
||||
}
|
||||
if (test_bit(CollisionCheck, &rdev->flags))
|
||||
wait_for_serialization(rdev, r1_bio);
|
||||
if (test_bit(WriteMostly, &rdev->flags))
|
||||
atomic_inc(&r1_bio->behind_remaining);
|
||||
}
|
||||
} else if (mddev->serialize_policy)
|
||||
wait_for_serialization(rdev, r1_bio);
|
||||
|
||||
r1_bio->bios[i] = mbio;
|
||||
|
||||
|
@ -6598,7 +6598,6 @@ raid5_show_group_thread_cnt(struct mddev *mddev, char *page)
|
||||
|
||||
static int alloc_thread_groups(struct r5conf *conf, int cnt,
|
||||
int *group_cnt,
|
||||
int *worker_cnt_per_group,
|
||||
struct r5worker_group **worker_groups);
|
||||
static ssize_t
|
||||
raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
@ -6607,7 +6606,7 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
unsigned int new;
|
||||
int err;
|
||||
struct r5worker_group *new_groups, *old_groups;
|
||||
int group_cnt, worker_cnt_per_group;
|
||||
int group_cnt;
|
||||
|
||||
if (len >= PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
@ -6630,13 +6629,11 @@ raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len)
|
||||
if (old_groups)
|
||||
flush_workqueue(raid5_wq);
|
||||
|
||||
err = alloc_thread_groups(conf, new,
|
||||
&group_cnt, &worker_cnt_per_group,
|
||||
&new_groups);
|
||||
err = alloc_thread_groups(conf, new, &group_cnt, &new_groups);
|
||||
if (!err) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
conf->group_cnt = group_cnt;
|
||||
conf->worker_cnt_per_group = worker_cnt_per_group;
|
||||
conf->worker_cnt_per_group = new;
|
||||
conf->worker_groups = new_groups;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
@ -6672,16 +6669,13 @@ static struct attribute_group raid5_attrs_group = {
|
||||
.attrs = raid5_attrs,
|
||||
};
|
||||
|
||||
static int alloc_thread_groups(struct r5conf *conf, int cnt,
|
||||
int *group_cnt,
|
||||
int *worker_cnt_per_group,
|
||||
static int alloc_thread_groups(struct r5conf *conf, int cnt, int *group_cnt,
|
||||
struct r5worker_group **worker_groups)
|
||||
{
|
||||
int i, j, k;
|
||||
ssize_t size;
|
||||
struct r5worker *workers;
|
||||
|
||||
*worker_cnt_per_group = cnt;
|
||||
if (cnt == 0) {
|
||||
*group_cnt = 0;
|
||||
*worker_groups = NULL;
|
||||
@ -6882,7 +6876,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
struct disk_info *disk;
|
||||
char pers_name[6];
|
||||
int i;
|
||||
int group_cnt, worker_cnt_per_group;
|
||||
int group_cnt;
|
||||
struct r5worker_group *new_group;
|
||||
int ret;
|
||||
|
||||
@ -6928,10 +6922,9 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
for (i = 0; i < PENDING_IO_MAX; i++)
|
||||
list_add(&conf->pending_data[i].sibling, &conf->free_list);
|
||||
/* Don't enable multi-threading by default*/
|
||||
if (!alloc_thread_groups(conf, 0, &group_cnt, &worker_cnt_per_group,
|
||||
&new_group)) {
|
||||
if (!alloc_thread_groups(conf, 0, &group_cnt, &new_group)) {
|
||||
conf->group_cnt = group_cnt;
|
||||
conf->worker_cnt_per_group = worker_cnt_per_group;
|
||||
conf->worker_cnt_per_group = 0;
|
||||
conf->worker_groups = new_group;
|
||||
} else
|
||||
goto abort;
|
||||
|
@ -27,8 +27,8 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
|
||||
|
||||
#include <errno.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/types.h>
|
||||
@ -44,6 +44,9 @@ typedef uint64_t u64;
|
||||
#ifndef PAGE_SIZE
|
||||
# define PAGE_SIZE 4096
|
||||
#endif
|
||||
#ifndef PAGE_SHIFT
|
||||
# define PAGE_SHIFT 12
|
||||
#endif
|
||||
extern const char raid6_empty_zero_page[PAGE_SIZE];
|
||||
|
||||
#define __init
|
||||
@ -59,7 +62,9 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
|
||||
#define enable_kernel_altivec()
|
||||
#define disable_kernel_altivec()
|
||||
|
||||
#undef EXPORT_SYMBOL
|
||||
#define EXPORT_SYMBOL(sym)
|
||||
#undef EXPORT_SYMBOL_GPL
|
||||
#define EXPORT_SYMBOL_GPL(sym)
|
||||
#define MODULE_LICENSE(licence)
|
||||
#define MODULE_DESCRIPTION(desc)
|
||||
|
@ -124,6 +124,9 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
|
||||
#define time_before(x, y) ((x) < (y))
|
||||
#endif
|
||||
|
||||
#define RAID6_TEST_DISKS 8
|
||||
#define RAID6_TEST_DISKS_ORDER 3
|
||||
|
||||
static inline const struct raid6_recov_calls *raid6_choose_recov(void)
|
||||
{
|
||||
const struct raid6_recov_calls *const *algo;
|
||||
@ -146,7 +149,7 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
|
||||
}
|
||||
|
||||
static inline const struct raid6_calls *raid6_choose_gen(
|
||||
void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
|
||||
void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
|
||||
{
|
||||
unsigned long perf, bestgenperf, bestxorperf, j0, j1;
|
||||
int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */
|
||||
@ -181,7 +184,8 @@ static inline const struct raid6_calls *raid6_choose_gen(
|
||||
best = *algo;
|
||||
}
|
||||
pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
|
||||
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
||||
(perf * HZ * (disks-2)) >>
|
||||
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2));
|
||||
|
||||
if (!(*algo)->xor_syndrome)
|
||||
continue;
|
||||
@ -204,17 +208,24 @@ static inline const struct raid6_calls *raid6_choose_gen(
|
||||
bestxorperf = perf;
|
||||
|
||||
pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
|
||||
(perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
|
||||
(perf * HZ * (disks-2)) >>
|
||||
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (best) {
|
||||
pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
|
||||
best->name,
|
||||
(bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
|
||||
if (best->xor_syndrome)
|
||||
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
|
||||
(bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
|
||||
if (IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK)) {
|
||||
pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
|
||||
best->name,
|
||||
(bestgenperf * HZ * (disks-2)) >>
|
||||
(20 - PAGE_SHIFT+RAID6_TIME_JIFFIES_LG2));
|
||||
if (best->xor_syndrome)
|
||||
pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
|
||||
(bestxorperf * HZ * (disks-2)) >>
|
||||
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
|
||||
} else
|
||||
pr_info("raid6: skip pq benchmark and using algorithm %s\n",
|
||||
best->name);
|
||||
raid6_call = *best;
|
||||
} else
|
||||
pr_err("raid6: Yikes! No algorithm found!\n");
|
||||
@ -228,27 +239,33 @@ static inline const struct raid6_calls *raid6_choose_gen(
|
||||
|
||||
int __init raid6_select_algo(void)
|
||||
{
|
||||
const int disks = (65536/PAGE_SIZE)+2;
|
||||
const int disks = RAID6_TEST_DISKS;
|
||||
|
||||
const struct raid6_calls *gen_best;
|
||||
const struct raid6_recov_calls *rec_best;
|
||||
char *syndromes;
|
||||
void *dptrs[(65536/PAGE_SIZE)+2];
|
||||
int i;
|
||||
char *disk_ptr, *p;
|
||||
void *dptrs[RAID6_TEST_DISKS];
|
||||
int i, cycle;
|
||||
|
||||
for (i = 0; i < disks-2; i++)
|
||||
dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
|
||||
|
||||
/* Normal code - use a 2-page allocation to avoid D$ conflict */
|
||||
syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
|
||||
|
||||
if (!syndromes) {
|
||||
/* prepare the buffer and fill it circularly with gfmul table */
|
||||
disk_ptr = (char *)__get_free_pages(GFP_KERNEL, RAID6_TEST_DISKS_ORDER);
|
||||
if (!disk_ptr) {
|
||||
pr_err("raid6: Yikes! No memory available.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dptrs[disks-2] = syndromes;
|
||||
dptrs[disks-1] = syndromes + PAGE_SIZE;
|
||||
p = disk_ptr;
|
||||
for (i = 0; i < disks; i++)
|
||||
dptrs[i] = p + PAGE_SIZE * i;
|
||||
|
||||
cycle = ((disks - 2) * PAGE_SIZE) / 65536;
|
||||
for (i = 0; i < cycle; i++) {
|
||||
memcpy(p, raid6_gfmul, 65536);
|
||||
p += 65536;
|
||||
}
|
||||
|
||||
if ((disks - 2) * PAGE_SIZE % 65536)
|
||||
memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536);
|
||||
|
||||
/* select raid gen_syndrome function */
|
||||
gen_best = raid6_choose_gen(&dptrs, disks);
|
||||
@ -256,7 +273,7 @@ int __init raid6_select_algo(void)
|
||||
/* select raid recover functions */
|
||||
rec_best = raid6_choose_recov();
|
||||
|
||||
free_pages((unsigned long)syndromes, 1);
|
||||
free_pages((unsigned long)disk_ptr, RAID6_TEST_DISKS_ORDER);
|
||||
|
||||
return gen_best && rec_best ? 0 : -EINVAL;
|
||||
}
|
||||
|
@ -56,8 +56,8 @@ int main(int argc, char *argv[])
|
||||
uint8_t v;
|
||||
uint8_t exptbl[256], invtbl[256];
|
||||
|
||||
printf("#include <linux/raid/pq.h>\n");
|
||||
printf("#include <linux/export.h>\n");
|
||||
printf("#include <linux/raid/pq.h>\n");
|
||||
|
||||
/* Compute multiplication table */
|
||||
printf("\nconst u8 __attribute__((aligned(256)))\n"
|
||||
|
Loading…
Reference in New Issue
Block a user