mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 12:42:02 +00:00
[PATCH] md: Final stages of raid5 expand code
This patch adds raid5_reshape and end_reshape which will start and finish the reshape processes. raid5_reshape is only enabled in CONFIG_MD_RAID5_RESHAPE is set, to discourage accidental use. Read the 'help' for the CONFIG_MD_RAID5_RESHAPE entry. and Make sure that you have backups, just in case. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
ccfcc3c10b
commit
292695531a
@ -127,6 +127,32 @@ config MD_RAID5
|
||||
|
||||
If unsure, say Y.
|
||||
|
||||
config MD_RAID5_RESHAPE
|
||||
bool "Support adding drives to a raid-5 array (experimental)"
|
||||
depends on MD_RAID5 && EXPERIMENTAL
|
||||
---help---
|
||||
A RAID-5 set can be expanded by adding extra drives. This
|
||||
requires "restriping" the array which means (almost) every
|
||||
block must be written to a different place.
|
||||
|
||||
This option allows such restriping to be done while the array
|
||||
is online. However it is still EXPERIMENTAL code. It should
|
||||
work, but please be sure that you have backups.
|
||||
|
||||
You will need a version of mdadm newer than 2.3.1. During the
|
||||
early stage of reshape there is a critical section where live data
|
||||
is being over-written. A crash during this time needs extra care
|
||||
for recovery. The newer mdadm takes a copy of the data in the
|
||||
critical section and will restore it, if necessary, after a crash.
|
||||
|
||||
The mdadm usage is e.g.
|
||||
mdadm --grow /dev/md1 --raid-disks=6
|
||||
to grow '/dev/md1' to having 6 disks.
|
||||
|
||||
Note: The array can only be expanded, not contracted.
|
||||
There should be enough spares already present to make the new
|
||||
array workable.
|
||||
|
||||
config MD_RAID6
|
||||
tristate "RAID-6 mode"
|
||||
depends on BLK_DEV_MD
|
||||
|
@ -158,11 +158,12 @@ static int start_readonly;
|
||||
*/
|
||||
static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
|
||||
static atomic_t md_event_count;
|
||||
static void md_new_event(mddev_t *mddev)
|
||||
void md_new_event(mddev_t *mddev)
|
||||
{
|
||||
atomic_inc(&md_event_count);
|
||||
wake_up(&md_event_waiters);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_new_event);
|
||||
|
||||
/*
|
||||
* Enables to iterate over all existing md arrays
|
||||
@ -4467,7 +4468,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
||||
|
||||
#define SYNC_MARKS 10
|
||||
#define SYNC_MARK_STEP (3*HZ)
|
||||
static void md_do_sync(mddev_t *mddev)
|
||||
void md_do_sync(mddev_t *mddev)
|
||||
{
|
||||
mddev_t *mddev2;
|
||||
unsigned int currspeed = 0,
|
||||
@ -4704,6 +4705,7 @@ static void md_do_sync(mddev_t *mddev)
|
||||
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_do_sync);
|
||||
|
||||
|
||||
/*
|
||||
|
@ -331,6 +331,8 @@ static int grow_stripes(raid5_conf_t *conf, int num)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MD_RAID5_RESHAPE
|
||||
static int resize_stripes(raid5_conf_t *conf, int newsize)
|
||||
{
|
||||
/* Make all the stripes able to hold 'newsize' devices.
|
||||
@ -451,7 +453,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
|
||||
conf->pool_size = newsize;
|
||||
return err;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int drop_one_stripe(raid5_conf_t *conf)
|
||||
{
|
||||
@ -1034,6 +1036,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void end_reshape(raid5_conf_t *conf);
|
||||
|
||||
static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
|
||||
{
|
||||
int sectors_per_chunk = conf->chunk_size >> 9;
|
||||
@ -1844,6 +1848,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
|
||||
if (sector_nr >= max_sector) {
|
||||
/* just being told to finish up .. nothing much to do */
|
||||
unplug_slaves(mddev);
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
|
||||
end_reshape(conf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mddev->curr_resync < max_sector) /* aborted */
|
||||
bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
|
||||
@ -2464,6 +2472,116 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MD_RAID5_RESHAPE
|
||||
static int raid5_reshape(mddev_t *mddev, int raid_disks)
|
||||
{
|
||||
raid5_conf_t *conf = mddev_to_conf(mddev);
|
||||
int err;
|
||||
mdk_rdev_t *rdev;
|
||||
struct list_head *rtmp;
|
||||
int spares = 0;
|
||||
int added_devices = 0;
|
||||
|
||||
if (mddev->degraded ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
if (conf->raid_disks > raid_disks)
|
||||
return -EINVAL; /* Cannot shrink array yet */
|
||||
if (conf->raid_disks == raid_disks)
|
||||
return 0; /* nothing to do */
|
||||
|
||||
/* Can only proceed if there are plenty of stripe_heads.
|
||||
* We need a minimum of one full stripe,, and for sensible progress
|
||||
* it is best to have about 4 times that.
|
||||
* If we require 4 times, then the default 256 4K stripe_heads will
|
||||
* allow for chunk sizes up to 256K, which is probably OK.
|
||||
* If the chunk size is greater, user-space should request more
|
||||
* stripe_heads first.
|
||||
*/
|
||||
if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
|
||||
printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
|
||||
(mddev->chunk_size / STRIPE_SIZE)*4);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
ITERATE_RDEV(mddev, rdev, rtmp)
|
||||
if (rdev->raid_disk < 0 &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
spares++;
|
||||
if (conf->raid_disks + spares < raid_disks-1)
|
||||
/* Not enough devices even to make a degraded array
|
||||
* of that size
|
||||
*/
|
||||
return -EINVAL;
|
||||
|
||||
err = resize_stripes(conf, raid_disks);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
conf->previous_raid_disks = conf->raid_disks;
|
||||
mddev->raid_disks = conf->raid_disks = raid_disks;
|
||||
conf->expand_progress = 0;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
|
||||
/* Add some new drives, as many as will fit.
|
||||
* We know there are enough to make the newly sized array work.
|
||||
*/
|
||||
ITERATE_RDEV(mddev, rdev, rtmp)
|
||||
if (rdev->raid_disk < 0 &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
if (raid5_add_disk(mddev, rdev)) {
|
||||
char nm[20];
|
||||
set_bit(In_sync, &rdev->flags);
|
||||
conf->working_disks++;
|
||||
added_devices++;
|
||||
sprintf(nm, "rd%d", rdev->raid_disk);
|
||||
sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices;
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
|
||||
"%s_reshape");
|
||||
if (!mddev->sync_thread) {
|
||||
mddev->recovery = 0;
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
|
||||
conf->expand_progress = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
md_new_event(mddev);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void end_reshape(raid5_conf_t *conf)
|
||||
{
|
||||
struct block_device *bdev;
|
||||
|
||||
conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1);
|
||||
set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
|
||||
conf->mddev->changed = 1;
|
||||
|
||||
bdev = bdget_disk(conf->mddev->gendisk, 0);
|
||||
if (bdev) {
|
||||
mutex_lock(&bdev->bd_inode->i_mutex);
|
||||
i_size_write(bdev->bd_inode, conf->mddev->array_size << 10);
|
||||
mutex_unlock(&bdev->bd_inode->i_mutex);
|
||||
bdput(bdev);
|
||||
}
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
conf->expand_progress = MaxSector;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
}
|
||||
|
||||
static void raid5_quiesce(mddev_t *mddev, int state)
|
||||
{
|
||||
raid5_conf_t *conf = mddev_to_conf(mddev);
|
||||
@ -2502,6 +2620,9 @@ static struct mdk_personality raid5_personality =
|
||||
.spare_active = raid5_spare_active,
|
||||
.sync_request = sync_request,
|
||||
.resize = raid5_resize,
|
||||
#ifdef CONFIG_MD_RAID5_RESHAPE
|
||||
.reshape = raid5_reshape,
|
||||
#endif
|
||||
.quiesce = raid5_quiesce,
|
||||
};
|
||||
|
||||
|
@ -92,7 +92,8 @@ extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
|
||||
extern void md_super_wait(mddev_t *mddev);
|
||||
extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
|
||||
struct page *page, int rw);
|
||||
|
||||
extern void md_do_sync(mddev_t *mddev);
|
||||
extern void md_new_event(mddev_t *mddev);
|
||||
|
||||
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user