Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD bugfixes from Shaohua Li: - fix raid5-ppl flush request handling hang from Artur - fix a potential deadlock in raid5/10 reshape from BingJing - fix a deadlock for dm-raid from Heinz - fix two md-cluster of raid10 from Lidong and Guoqing - fix a NULL deference problem in device removal from Neil - fix a NULL deference problem in raid1/raid10 in specific condition from Yufen - other cleanup and fixes * 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/raid1: fix NULL pointer dereference md: fix a potential deadlock of raid5/raid10 reshape md-cluster: choose correct label when clustered layout is not supported md: raid5: avoid string overflow warning raid5-ppl: fix handling flush requests md raid10: fix NULL deference in handle_write_completed() md: only allow remove_and_add_spares when no sync_thread running. md: document lifetime of internal rdev pointer. md: fix md_write_start() deadlock w/o metadata devices MD: Free bioset when md_run fails raid10: change the size of resync window for clustered raid md-multipath: Use seq_putc() in multipath_status() md/raid1: Fix trailing semicolon md/raid5: simplify uninitialization of shrinker
This commit is contained in:
commit
7e30309968
@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
|
||||
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf (seq, "]");
|
||||
seq_putc(seq, ']');
|
||||
}
|
||||
|
||||
static int multipath_congested(struct mddev *mddev, int bits)
|
||||
|
@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
|
||||
struct bio *bio;
|
||||
int ff = 0;
|
||||
|
||||
if (!page)
|
||||
return;
|
||||
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
return;
|
||||
|
||||
@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev)
|
||||
* the only valid external interface is through the md
|
||||
* device.
|
||||
*/
|
||||
mddev->has_superblocks = false;
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev)
|
||||
set_disk_ro(mddev->gendisk, 1);
|
||||
}
|
||||
|
||||
if (rdev->sb_page)
|
||||
mddev->has_superblocks = true;
|
||||
|
||||
/* perform some consistency tests on the device.
|
||||
* We don't want the data to overlap the metadata,
|
||||
* Internal Bitmap issues have been handled elsewhere.
|
||||
@ -5497,8 +5504,10 @@ int md_run(struct mddev *mddev)
|
||||
}
|
||||
if (mddev->sync_set == NULL) {
|
||||
mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
if (!mddev->sync_set)
|
||||
return -ENOMEM;
|
||||
if (!mddev->sync_set) {
|
||||
err = -ENOMEM;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&pers_lock);
|
||||
@ -5511,7 +5520,8 @@ int md_run(struct mddev *mddev)
|
||||
else
|
||||
pr_warn("md: personality for level %s is not loaded!\n",
|
||||
mddev->clevel);
|
||||
return -EINVAL;
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
spin_unlock(&pers_lock);
|
||||
if (mddev->level != pers->level) {
|
||||
@ -5524,7 +5534,8 @@ int md_run(struct mddev *mddev)
|
||||
pers->start_reshape == NULL) {
|
||||
/* This personality cannot handle reshaping... */
|
||||
module_put(pers->owner);
|
||||
return -EINVAL;
|
||||
err = -EINVAL;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
if (pers->sync_request) {
|
||||
@ -5593,7 +5604,7 @@ int md_run(struct mddev *mddev)
|
||||
mddev->private = NULL;
|
||||
module_put(pers->owner);
|
||||
bitmap_destroy(mddev);
|
||||
return err;
|
||||
goto abort;
|
||||
}
|
||||
if (mddev->queue) {
|
||||
bool nonrot = true;
|
||||
@ -5655,6 +5666,18 @@ int md_run(struct mddev *mddev)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
||||
return 0;
|
||||
|
||||
abort:
|
||||
if (mddev->bio_set) {
|
||||
bioset_free(mddev->bio_set);
|
||||
mddev->bio_set = NULL;
|
||||
}
|
||||
if (mddev->sync_set) {
|
||||
bioset_free(mddev->sync_set);
|
||||
mddev->sync_set = NULL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_run);
|
||||
|
||||
@ -8049,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync);
|
||||
bool md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
{
|
||||
int did_change = 0;
|
||||
|
||||
if (bio_data_dir(bi) != WRITE)
|
||||
return true;
|
||||
|
||||
@ -8081,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
rcu_read_unlock();
|
||||
if (did_change)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
if (!mddev->has_superblocks)
|
||||
return true;
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
|
||||
mddev->suspended);
|
||||
@ -8543,6 +8569,19 @@ void md_do_sync(struct md_thread *thread)
|
||||
set_mask_bits(&mddev->sb_flags, 0,
|
||||
BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));
|
||||
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
mddev->delta_disks > 0 &&
|
||||
mddev->pers->finish_reshape &&
|
||||
mddev->pers->size &&
|
||||
mddev->queue) {
|
||||
mddev_lock_nointr(mddev);
|
||||
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
|
||||
mddev_unlock(mddev);
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
}
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
/* We completed so min/max setting can be forgotten if used. */
|
||||
@ -8569,6 +8608,10 @@ static int remove_and_add_spares(struct mddev *mddev,
|
||||
int removed = 0;
|
||||
bool remove_some = false;
|
||||
|
||||
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
/* Mustn't remove devices when resync thread is running */
|
||||
return 0;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) &&
|
||||
rdev->raid_disk >= 0 &&
|
||||
|
@ -468,6 +468,8 @@ struct mddev {
|
||||
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
|
||||
struct md_cluster_info *cluster_info;
|
||||
unsigned int good_device_nr; /* good device num within cluster raid */
|
||||
|
||||
bool has_superblocks:1;
|
||||
};
|
||||
|
||||
enum recovery_flags {
|
||||
|
@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
struct md_rdev *repl =
|
||||
conf->mirrors[conf->raid_disks + number].rdev;
|
||||
freeze_array(conf, 0);
|
||||
if (atomic_read(&repl->nr_pending)) {
|
||||
/* It means that some queued IO of retry_list
|
||||
* hold repl. Thus, we cannot set replacement
|
||||
* as NULL, avoiding rdev NULL pointer
|
||||
* dereference in sync_request_write and
|
||||
* handle_write_finished.
|
||||
*/
|
||||
err = -EBUSY;
|
||||
unfreeze_array(conf);
|
||||
goto abort;
|
||||
}
|
||||
clear_bit(Replacement, &repl->flags);
|
||||
p->rdev = repl;
|
||||
conf->mirrors[conf->raid_disks + number].rdev = NULL;
|
||||
|
@ -26,6 +26,18 @@
|
||||
#define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
|
||||
#define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS)
|
||||
|
||||
/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
|
||||
* There are three safe ways to access raid1_info.rdev.
|
||||
* 1/ when holding mddev->reconfig_mutex
|
||||
* 2/ when resync/recovery is known to be happening - i.e. in code that is
|
||||
* called as part of performing resync/recovery.
|
||||
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
|
||||
* and if it is non-NULL, increment rdev->nr_pending before dropping the
|
||||
* RCU lock.
|
||||
* When .rdev is set to NULL, the nr_pending count checked again and if it has
|
||||
* been incremented, the pointer is put back in .rdev.
|
||||
*/
|
||||
|
||||
struct raid1_info {
|
||||
struct md_rdev *rdev;
|
||||
sector_t head_position;
|
||||
|
@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
|
||||
#define RESYNC_WINDOW (1024*1024)
|
||||
/* maximum number of concurrent requests, memory permitting */
|
||||
#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
|
||||
#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
|
||||
#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
|
||||
#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)
|
||||
|
||||
/*
|
||||
@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
for (m = 0; m < conf->copies; m++) {
|
||||
int dev = r10_bio->devs[m].devnum;
|
||||
rdev = conf->mirrors[dev].rdev;
|
||||
if (r10_bio->devs[m].bio == NULL)
|
||||
if (r10_bio->devs[m].bio == NULL ||
|
||||
r10_bio->devs[m].bio->bi_end_io == NULL)
|
||||
continue;
|
||||
if (!r10_bio->devs[m].bio->bi_status) {
|
||||
rdev_clear_badblocks(
|
||||
@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
||||
md_error(conf->mddev, rdev);
|
||||
}
|
||||
rdev = conf->mirrors[dev].replacement;
|
||||
if (r10_bio->devs[m].repl_bio == NULL)
|
||||
if (r10_bio->devs[m].repl_bio == NULL ||
|
||||
r10_bio->devs[m].repl_bio->bi_end_io == NULL)
|
||||
continue;
|
||||
|
||||
if (!r10_bio->devs[m].repl_bio->bi_status) {
|
||||
@ -3782,7 +3784,7 @@ static int raid10_run(struct mddev *mddev)
|
||||
if (fc > 1 || fo > 0) {
|
||||
pr_err("only near layout is supported by clustered"
|
||||
" raid10\n");
|
||||
goto out;
|
||||
goto out_free_conf;
|
||||
}
|
||||
}
|
||||
|
||||
@ -4830,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
|
||||
return;
|
||||
|
||||
if (mddev->delta_disks > 0) {
|
||||
sector_t size = raid10_size(mddev, 0, 0);
|
||||
md_set_array_sectors(mddev, size);
|
||||
if (mddev->recovery_cp > mddev->resync_max_sectors) {
|
||||
mddev->recovery_cp = mddev->resync_max_sectors;
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
}
|
||||
mddev->resync_max_sectors = size;
|
||||
if (mddev->queue) {
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
}
|
||||
mddev->resync_max_sectors = mddev->array_sectors;
|
||||
} else {
|
||||
int d;
|
||||
rcu_read_lock();
|
||||
|
@ -2,6 +2,19 @@
|
||||
#ifndef _RAID10_H
|
||||
#define _RAID10_H
|
||||
|
||||
/* Note: raid10_info.rdev can be set to NULL asynchronously by
|
||||
* raid10_remove_disk.
|
||||
* There are three safe ways to access raid10_info.rdev.
|
||||
* 1/ when holding mddev->reconfig_mutex
|
||||
* 2/ when resync/recovery/reshape is known to be happening - i.e. in code
|
||||
* that is called as part of performing resync/recovery/reshape.
|
||||
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
|
||||
* and if it is non-NULL, increment rdev->nr_pending before dropping the
|
||||
* RCU lock.
|
||||
* When .rdev is set to NULL, the nr_pending count checked again and if it has
|
||||
* been incremented, the pointer is put back in .rdev.
|
||||
*/
|
||||
|
||||
struct raid10_info {
|
||||
struct md_rdev *rdev, *replacement;
|
||||
sector_t head_position;
|
||||
|
@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf);
|
||||
extern void ppl_stripe_write_finished(struct stripe_head *sh);
|
||||
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
|
||||
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
|
||||
extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
|
||||
|
||||
static inline bool raid5_has_ppl(struct r5conf *conf)
|
||||
{
|
||||
@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
|
||||
if (conf->log)
|
||||
ret = r5l_handle_flush_request(conf->log, bio);
|
||||
else if (raid5_has_ppl(conf))
|
||||
ret = 0;
|
||||
ret = ppl_handle_flush_request(conf->log, bio);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
|
||||
}
|
||||
}
|
||||
|
||||
int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
|
||||
{
|
||||
if (bio->bi_iter.bi_size == 0) {
|
||||
bio_endio(bio);
|
||||
return 0;
|
||||
}
|
||||
bio->bi_opf &= ~REQ_PREFLUSH;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
void ppl_stripe_write_finished(struct stripe_head *sh)
|
||||
{
|
||||
struct ppl_io_unit *io;
|
||||
|
@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
|
||||
static int grow_stripes(struct r5conf *conf, int num)
|
||||
{
|
||||
struct kmem_cache *sc;
|
||||
size_t namelen = sizeof(conf->cache_name[0]);
|
||||
int devs = max(conf->raid_disks, conf->previous_raid_disks);
|
||||
|
||||
if (conf->mddev->gendisk)
|
||||
sprintf(conf->cache_name[0],
|
||||
snprintf(conf->cache_name[0], namelen,
|
||||
"raid%d-%s", conf->level, mdname(conf->mddev));
|
||||
else
|
||||
sprintf(conf->cache_name[0],
|
||||
snprintf(conf->cache_name[0], namelen,
|
||||
"raid%d-%p", conf->level, conf->mddev);
|
||||
sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
|
||||
snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);
|
||||
|
||||
conf->active_name = 0;
|
||||
sc = kmem_cache_create(conf->cache_name[conf->active_name],
|
||||
@ -6764,9 +6765,7 @@ static void free_conf(struct r5conf *conf)
|
||||
|
||||
log_exit(conf);
|
||||
|
||||
if (conf->shrinker.nr_deferred)
|
||||
unregister_shrinker(&conf->shrinker);
|
||||
|
||||
unregister_shrinker(&conf->shrinker);
|
||||
free_thread_groups(conf);
|
||||
shrink_stripes(conf);
|
||||
raid5_free_percpu(conf);
|
||||
@ -8001,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
|
||||
if (mddev->delta_disks > 0) {
|
||||
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
|
||||
if (mddev->queue) {
|
||||
set_capacity(mddev->gendisk, mddev->array_sectors);
|
||||
revalidate_disk(mddev->gendisk);
|
||||
}
|
||||
} else {
|
||||
if (mddev->delta_disks <= 0) {
|
||||
int d;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
mddev->degraded = raid5_calc_degraded(conf);
|
||||
|
@ -450,6 +450,18 @@ enum {
|
||||
* HANDLE gets cleared if stripe_handle leaves nothing locked.
|
||||
*/
|
||||
|
||||
/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk.
|
||||
* There are three safe ways to access disk_info.rdev.
|
||||
* 1/ when holding mddev->reconfig_mutex
|
||||
* 2/ when resync/recovery/reshape is known to be happening - i.e. in code that
|
||||
* is called as part of performing resync/recovery/reshape.
|
||||
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
|
||||
* and if it is non-NULL, increment rdev->nr_pending before dropping the RCU
|
||||
* lock.
|
||||
* When .rdev is set to NULL, the nr_pending count checked again and if
|
||||
* it has been incremented, the pointer is put back in .rdev.
|
||||
*/
|
||||
|
||||
struct disk_info {
|
||||
struct md_rdev *rdev, *replacement;
|
||||
struct page *extra_page; /* extra page to use in prexor */
|
||||
|
Loading…
Reference in New Issue
Block a user