md/raid1: handle merge_bvec_fn in member devices.
Currently we don't honour merge_bvec_fn in member devices so if there is one, we force all requests to be single-page at most. This is not ideal. So create a raid1 merge_bvec_fn to check that function in children as well. This introduces a small problem. There is no locking around calls the ->merge_bvec_fn and subsequent calls to ->make_request. So a device added between these could end up getting a request which violates its merge_bvec_fn. Currently the best we can do is synchronize_sched(). This will work providing no preemption happens. If there is is preemption, we just have to hope that new devices are largely consistent with old devices. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
050b66152f
commit
6b740b8d79
@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
|||||||
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
rdev = rcu_dereference(conf->mirrors[disk].rdev);
|
||||||
if (r1_bio->bios[disk] == IO_BLOCKED
|
if (r1_bio->bios[disk] == IO_BLOCKED
|
||||||
|| rdev == NULL
|
|| rdev == NULL
|
||||||
|
|| test_bit(Unmerged, &rdev->flags)
|
||||||
|| test_bit(Faulty, &rdev->flags))
|
|| test_bit(Faulty, &rdev->flags))
|
||||||
continue;
|
continue;
|
||||||
if (!test_bit(In_sync, &rdev->flags) &&
|
if (!test_bit(In_sync, &rdev->flags) &&
|
||||||
@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
|||||||
return best_disk;
|
return best_disk;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int raid1_mergeable_bvec(struct request_queue *q,
|
||||||
|
struct bvec_merge_data *bvm,
|
||||||
|
struct bio_vec *biovec)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = q->queuedata;
|
||||||
|
struct r1conf *conf = mddev->private;
|
||||||
|
sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
|
||||||
|
int max = biovec->bv_len;
|
||||||
|
|
||||||
|
if (mddev->merge_check_needed) {
|
||||||
|
int disk;
|
||||||
|
rcu_read_lock();
|
||||||
|
for (disk = 0; disk < conf->raid_disks * 2; disk++) {
|
||||||
|
struct md_rdev *rdev = rcu_dereference(
|
||||||
|
conf->mirrors[disk].rdev);
|
||||||
|
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
||||||
|
struct request_queue *q =
|
||||||
|
bdev_get_queue(rdev->bdev);
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
bvm->bi_sector = sector +
|
||||||
|
rdev->data_offset;
|
||||||
|
bvm->bi_bdev = rdev->bdev;
|
||||||
|
max = min(max, q->merge_bvec_fn(
|
||||||
|
q, bvm, biovec));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
int md_raid1_congested(struct mddev *mddev, int bits)
|
int md_raid1_congested(struct mddev *mddev, int bits)
|
||||||
{
|
{
|
||||||
struct r1conf *conf = mddev->private;
|
struct r1conf *conf = mddev->private;
|
||||||
@ -1015,7 +1049,8 @@ read_again:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
r1_bio->bios[i] = NULL;
|
r1_bio->bios[i] = NULL;
|
||||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
if (!rdev || test_bit(Faulty, &rdev->flags)
|
||||||
|
|| test_bit(Unmerged, &rdev->flags)) {
|
||||||
if (i < conf->raid_disks)
|
if (i < conf->raid_disks)
|
||||||
set_bit(R1BIO_Degraded, &r1_bio->state);
|
set_bit(R1BIO_Degraded, &r1_bio->state);
|
||||||
continue;
|
continue;
|
||||||
@ -1335,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
struct mirror_info *p;
|
struct mirror_info *p;
|
||||||
int first = 0;
|
int first = 0;
|
||||||
int last = conf->raid_disks - 1;
|
int last = conf->raid_disks - 1;
|
||||||
|
struct request_queue *q = bdev_get_queue(rdev->bdev);
|
||||||
|
|
||||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
@ -1342,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
first = last = rdev->raid_disk;
|
first = last = rdev->raid_disk;
|
||||||
|
|
||||||
|
if (q->merge_bvec_fn) {
|
||||||
|
set_bit(Unmerged, &rdev->flags);
|
||||||
|
mddev->merge_check_needed = 1;
|
||||||
|
}
|
||||||
|
|
||||||
for (mirror = first; mirror <= last; mirror++) {
|
for (mirror = first; mirror <= last; mirror++) {
|
||||||
p = conf->mirrors+mirror;
|
p = conf->mirrors+mirror;
|
||||||
if (!p->rdev) {
|
if (!p->rdev) {
|
||||||
|
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must
|
|
||||||
* never risk violating it, so limit
|
|
||||||
* ->max_segments to one lying with a single
|
|
||||||
* page, as a one page request is never in
|
|
||||||
* violation.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p->head_position = 0;
|
p->head_position = 0;
|
||||||
rdev->raid_disk = mirror;
|
rdev->raid_disk = mirror;
|
||||||
@ -1383,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
|
||||||
|
/* Some requests might not have seen this new
|
||||||
|
* merge_bvec_fn. We must wait for them to complete
|
||||||
|
* before merging the device fully.
|
||||||
|
* First we make sure any code which has tested
|
||||||
|
* our function has submitted the request, then
|
||||||
|
* we wait for all outstanding requests to complete.
|
||||||
|
*/
|
||||||
|
synchronize_sched();
|
||||||
|
raise_barrier(conf);
|
||||||
|
lower_barrier(conf);
|
||||||
|
clear_bit(Unmerged, &rdev->flags);
|
||||||
|
}
|
||||||
md_integrity_add_rdev(rdev, mddev);
|
md_integrity_add_rdev(rdev, mddev);
|
||||||
print_conf(conf);
|
print_conf(conf);
|
||||||
return err;
|
return err;
|
||||||
@ -2627,15 +2670,6 @@ static int run(struct mddev *mddev)
|
|||||||
continue;
|
continue;
|
||||||
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
disk_stack_limits(mddev->gendisk, rdev->bdev,
|
||||||
rdev->data_offset << 9);
|
rdev->data_offset << 9);
|
||||||
/* as we don't honour merge_bvec_fn, we must never risk
|
|
||||||
* violating it, so limit ->max_segments to 1 lying within
|
|
||||||
* a single page, as a one page request is never in violation.
|
|
||||||
*/
|
|
||||||
if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
|
|
||||||
blk_queue_max_segments(mddev->queue, 1);
|
|
||||||
blk_queue_segment_boundary(mddev->queue,
|
|
||||||
PAGE_CACHE_SIZE - 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mddev->degraded = 0;
|
mddev->degraded = 0;
|
||||||
@ -2669,6 +2703,7 @@ static int run(struct mddev *mddev)
|
|||||||
if (mddev->queue) {
|
if (mddev->queue) {
|
||||||
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
|
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
|
||||||
mddev->queue->backing_dev_info.congested_data = mddev;
|
mddev->queue->backing_dev_info.congested_data = mddev;
|
||||||
|
blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
|
||||||
}
|
}
|
||||||
return md_integrity_register(mddev);
|
return md_integrity_register(mddev);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user