Merge tag 'md-6.8-20240216' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into block-6.8

Pull MD fixes from Song:

"1. Fix issues reported for dm-raid [1], by Yu Kuai. Please note that
    this PR only contains the first half of the set [2]. We still need
    more fixes in dm and md code (the rest of the set, or alternative
    fixes).
 2. Fix active_io leak, by Yu Kuai. The fix was posted in the same set
    [2]. But it actually fixes a separate issue [3].

 [1] https://lore.kernel.org/linux-raid/e5e8afe2-e9a8-49a2-5ab0-958d4065c55e@redhat.com/
 [2] https://lore.kernel.org/linux-raid/20240201092559.910982-1-yukuai1@huaweicloud.com/
 [3] https://lore.kernel.org/linux-raid/20240130172524.0000417b@linux.intel.com/ "

* tag 'md-6.8-20240216' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Don't suspend the array for interrupted reshape
  md: Don't register sync_thread for reshape directly
  md: Make sure md_do_sync() will set MD_RECOVERY_DONE
  md: Don't ignore read-only array in md_check_recovery()
  md: Don't ignore suspended array in md_check_recovery()
  md: Fix missing release of 'active_io' for flush
This commit is contained in:
Jens Axboe 2024-02-16 15:42:01 -07:00
commit 15afd3d332
3 changed files with 48 additions and 67 deletions

View File

@ -579,8 +579,12 @@ static void submit_flushes(struct work_struct *ws)
rcu_read_lock(); rcu_read_lock();
} }
rcu_read_unlock(); rcu_read_unlock();
if (atomic_dec_and_test(&mddev->flush_pending)) if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pair is percpu_ref_get() from md_flush_request() */
percpu_ref_put(&mddev->active_io);
queue_work(md_wq, &mddev->flush_work); queue_work(md_wq, &mddev->flush_work);
}
} }
static void md_submit_flush_data(struct work_struct *ws) static void md_submit_flush_data(struct work_struct *ws)
@ -8788,12 +8792,16 @@ void md_do_sync(struct md_thread *thread)
int ret; int ret;
/* just incase thread restarts... */ /* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) || if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return; return;
if (!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto skip;
if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) ||
!md_is_rdwr(mddev)) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
return; goto skip;
} }
if (mddev_is_clustered(mddev)) { if (mddev_is_clustered(mddev)) {
@ -9368,13 +9376,19 @@ static void md_start_sync(struct work_struct *ws)
struct mddev *mddev = container_of(ws, struct mddev, sync_work); struct mddev *mddev = container_of(ws, struct mddev, sync_work);
int spares = 0; int spares = 0;
bool suspend = false; bool suspend = false;
char *name;
if (md_spares_need_change(mddev)) /*
* If reshape is still in progress, spares won't be added or removed
* from conf until reshape is done.
*/
if (mddev->reshape_position == MaxSector &&
md_spares_need_change(mddev)) {
suspend = true; suspend = true;
mddev_suspend(mddev, false);
}
suspend ? mddev_suspend_and_lock_nointr(mddev) : mddev_lock_nointr(mddev);
mddev_lock_nointr(mddev);
if (!md_is_rdwr(mddev)) { if (!md_is_rdwr(mddev)) {
/* /*
* On a read-only array we can: * On a read-only array we can:
@ -9400,8 +9414,10 @@ static void md_start_sync(struct work_struct *ws)
if (spares) if (spares)
md_bitmap_write_all(mddev->bitmap); md_bitmap_write_all(mddev->bitmap);
name = test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ?
"reshape" : "resync";
rcu_assign_pointer(mddev->sync_thread, rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "resync")); md_register_thread(md_do_sync, mddev, name));
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
pr_warn("%s: could not start resync thread...\n", pr_warn("%s: could not start resync thread...\n",
mdname(mddev)); mdname(mddev));
@ -9445,6 +9461,20 @@ not_running:
sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify_dirent_safe(mddev->sysfs_action);
} }
static void unregister_sync_thread(struct mddev *mddev)
{
if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
/* resync/recovery still happening */
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
return;
}
if (WARN_ON_ONCE(!mddev->sync_thread))
return;
md_reap_sync_thread(mddev);
}
/* /*
* This routine is regularly called by all per-raid-array threads to * This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update. * deal with generic issues like resync and super-block update.
@ -9469,9 +9499,6 @@ not_running:
*/ */
void md_check_recovery(struct mddev *mddev) void md_check_recovery(struct mddev *mddev)
{ {
if (READ_ONCE(mddev->suspended))
return;
if (mddev->bitmap) if (mddev->bitmap)
md_bitmap_daemon_work(mddev); md_bitmap_daemon_work(mddev);
@ -9485,7 +9512,8 @@ void md_check_recovery(struct mddev *mddev)
} }
if (!md_is_rdwr(mddev) && if (!md_is_rdwr(mddev) &&
!test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery))
return; return;
if ( ! ( if ( ! (
(mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
@ -9507,8 +9535,7 @@ void md_check_recovery(struct mddev *mddev)
struct md_rdev *rdev; struct md_rdev *rdev;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
/* sync_work already queued. */ unregister_sync_thread(mddev);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock; goto unlock;
} }
@ -9571,16 +9598,7 @@ void md_check_recovery(struct mddev *mddev)
* still set. * still set.
*/ */
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
if (!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { unregister_sync_thread(mddev);
/* resync/recovery still happening */
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}
if (WARN_ON_ONCE(!mddev->sync_thread))
goto unlock;
md_reap_sync_thread(mddev);
goto unlock; goto unlock;
} }

View File

@ -4175,11 +4175,7 @@ static int raid10_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
if (!mddev->sync_thread)
goto out_free_conf;
} }
return 0; return 0;
@ -4573,16 +4569,8 @@ out:
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
if (!mddev->sync_thread) {
ret = -EAGAIN;
goto abort;
}
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
md_wakeup_thread(mddev->sync_thread);
md_new_event(); md_new_event();
return 0; return 0;

View File

@ -7936,11 +7936,7 @@ static int raid5_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
if (!mddev->sync_thread)
goto abort;
} }
/* Ok, everything is just fine now */ /* Ok, everything is just fine now */
@ -8506,29 +8502,8 @@ static int raid5_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
if (!mddev->sync_thread) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
write_seqcount_begin(&conf->gen_lock);
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
mddev->new_chunk_sectors =
conf->chunk_sectors = conf->prev_chunk_sectors;
mddev->new_layout = conf->algorithm = conf->prev_algo;
rdev_for_each(rdev, mddev)
rdev->new_data_offset = rdev->data_offset;
smp_wmb();
conf->generation --;
conf->reshape_progress = MaxSector;
mddev->reshape_position = MaxSector;
write_seqcount_end(&conf->gen_lock);
spin_unlock_irq(&conf->device_lock);
return -EAGAIN;
}
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
md_wakeup_thread(mddev->sync_thread);
md_new_event(); md_new_event();
return 0; return 0;
} }