Merge tag 'md-6.13-20241105' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.13/block

Pull MD changes from Song:

"1. Enhance handling of faulty and blocked devices, by Yu Kuai.
 2. raid5-ppl atomic improvement, by Uros Bizjak.
 3. md-bitmap fix, by Yuan Can."

* tag 'md-6.13-20241105' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/md-bitmap: Add missing destroy_work_on_stack()
  md/raid5: don't set Faulty rdev for blocked_rdev
  md/raid10: don't wait for Faulty rdev in wait_blocked_rdev()
  md/raid1: don't wait for Faulty rdev in wait_blocked_rdev()
  md/raid1: factor out helper to handle blocked rdev from raid1_write_request()
  md: don't record new badblocks for faulty rdev
  md: don't wait faulty rdev in md_wait_for_blocked_rdev()
  md: add a new helper rdev_blocked()
  md/raid5-ppl: Use atomic64_inc_return() in ppl_new_iounit()
This commit is contained in:
Jens Axboe 2024-11-06 07:55:19 -07:00
commit 0b66deb16c
7 changed files with 101 additions and 69 deletions

View File

@ -1285,6 +1285,7 @@ static void bitmap_unplug_async(struct bitmap *bitmap)
queue_work(md_bitmap_wq, &unplug_work.work);
wait_for_completion(&done);
destroy_work_on_stack(&unplug_work.work);
}
static void bitmap_unplug(struct mddev *mddev, bool sync)

View File

@ -9762,9 +9762,7 @@ EXPORT_SYMBOL(md_reap_sync_thread);
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
sysfs_notify_dirent_safe(rdev->sysfs_state);
wait_event_timeout(rdev->blocked_wait,
!test_bit(Blocked, &rdev->flags) &&
!test_bit(BlockedBadBlocks, &rdev->flags),
wait_event_timeout(rdev->blocked_wait, !rdev_blocked(rdev),
msecs_to_jiffies(5000));
rdev_dec_pending(rdev, mddev);
}
@ -9793,6 +9791,17 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
{
struct mddev *mddev = rdev->mddev;
int rv;
/*
* Recording new badblocks for faulty rdev will force unnecessary
* super block updating. This is fragile for external management because
* userspace daemon may trying to remove this device and deadlock may
* occur. This will be probably solved in the mdadm, but it is safer to
* avoid it.
*/
if (test_bit(Faulty, &rdev->flags))
return 1;
if (is_new)
s += rdev->new_data_offset;
else

View File

@ -1002,6 +1002,30 @@ static inline void mddev_trace_remap(struct mddev *mddev, struct bio *bio,
trace_block_bio_remap(bio, disk_devt(mddev->gendisk), sector);
}
static inline bool rdev_blocked(struct md_rdev *rdev)
{
/*
* Blocked will be set by error handler and cleared by daemon after
* updating superblock, meanwhile write IO should be blocked to prevent
* reading old data after power failure.
*/
if (test_bit(Blocked, &rdev->flags))
return true;
/*
* Faulty device should not be accessed anymore, there is no need to
* wait for bad block to be acknowledged.
*/
if (test_bit(Faulty, &rdev->flags))
return false;
/* rdev is blocked by badblocks. */
if (test_bit(BlockedBadBlocks, &rdev->flags))
return true;
return false;
}
#define mddev_add_trace_msg(mddev, fmt, args...) \
do { \
if (!mddev_is_dm(mddev)) \

View File

@ -1412,6 +1412,40 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
submit_bio_noacct(read_bio);
}
static bool wait_blocked_rdev(struct mddev *mddev, struct bio *bio)
{
struct r1conf *conf = mddev->private;
int disks = conf->raid_disks * 2;
int i;
retry:
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = conf->mirrors[i].rdev;
if (!rdev)
continue;
/* don't write here until the bad block is acknowledged */
if (test_bit(WriteErrorSeen, &rdev->flags) &&
rdev_has_badblock(rdev, bio->bi_iter.bi_sector,
bio_sectors(bio)) < 0)
set_bit(BlockedBadBlocks, &rdev->flags);
if (rdev_blocked(rdev)) {
if (bio->bi_opf & REQ_NOWAIT)
return false;
mddev_add_trace_msg(rdev->mddev, "raid1 wait rdev %d blocked",
rdev->raid_disk);
atomic_inc(&rdev->nr_pending);
md_wait_for_blocked_rdev(rdev, rdev->mddev);
goto retry;
}
}
return true;
}
static void raid1_write_request(struct mddev *mddev, struct bio *bio,
int max_write_sectors)
{
@ -1419,7 +1453,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct r1bio *r1_bio;
int i, disks;
unsigned long flags;
struct md_rdev *blocked_rdev;
int first_clone;
int max_sectors;
bool write_behind = false;
@ -1457,7 +1490,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
return;
}
retry_write:
if (!wait_blocked_rdev(mddev, bio)) {
bio_wouldblock_error(bio);
return;
}
r1_bio = alloc_r1bio(mddev, bio);
r1_bio->sectors = max_write_sectors;
@ -1473,7 +1510,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
*/
disks = conf->raid_disks * 2;
blocked_rdev = NULL;
max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) {
struct md_rdev *rdev = conf->mirrors[i].rdev;
@ -1486,11 +1522,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
if (!is_discard && rdev && test_bit(WriteMostly, &rdev->flags))
write_behind = true;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
break;
}
r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks)
@ -1506,13 +1537,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
/* mustn't write here until the bad block is
* acknowledged*/
set_bit(BlockedBadBlocks, &rdev->flags);
blocked_rdev = rdev;
break;
}
if (is_bad && first_bad <= r1_bio->sector) {
/* Cannot write here at all */
bad_sectors -= (r1_bio->sector - first_bad);
@ -1543,27 +1567,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->bios[i] = bio;
}
if (unlikely(blocked_rdev)) {
/* Wait for this device to become unblocked */
int j;
for (j = 0; j < i; j++)
if (r1_bio->bios[j])
rdev_dec_pending(conf->mirrors[j].rdev, mddev);
mempool_free(r1_bio, &conf->r1bio_pool);
allow_barrier(conf, bio->bi_iter.bi_sector);
if (bio->bi_opf & REQ_NOWAIT) {
bio_wouldblock_error(bio);
return;
}
mddev_add_trace_msg(mddev, "raid1 wait rdev %d blocked",
blocked_rdev->raid_disk);
md_wait_for_blocked_rdev(blocked_rdev, mddev);
wait_barrier(conf, bio->bi_iter.bi_sector, false);
goto retry_write;
}
/*
* When using a bitmap, we may call alloc_behind_master_bio below.
* alloc_behind_master_bio allocates a copy of the data payload a page

View File

@ -1285,9 +1285,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
{
int i;
struct r10conf *conf = mddev->private;
struct md_rdev *blocked_rdev;
int i;
retry_wait:
blocked_rdev = NULL;
@ -1295,40 +1295,36 @@ retry_wait:
struct md_rdev *rdev, *rrdev;
rdev = conf->mirrors[i].rdev;
rrdev = conf->mirrors[i].replacement;
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
atomic_inc(&rdev->nr_pending);
blocked_rdev = rdev;
break;
}
if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
atomic_inc(&rrdev->nr_pending);
blocked_rdev = rrdev;
break;
}
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
if (rdev) {
sector_t dev_sector = r10_bio->devs[i].addr;
/*
* Discard request doesn't care the write result
* so it doesn't need to wait blocked disk here.
*/
if (!r10_bio->sectors)
continue;
if (rdev_has_badblock(rdev, dev_sector,
r10_bio->sectors) < 0) {
if (test_bit(WriteErrorSeen, &rdev->flags) &&
r10_bio->sectors &&
rdev_has_badblock(rdev, dev_sector,
r10_bio->sectors) < 0)
/*
* Mustn't write here until the bad block
* is acknowledged
* Mustn't write here until the bad
* block is acknowledged
*/
atomic_inc(&rdev->nr_pending);
set_bit(BlockedBadBlocks, &rdev->flags);
if (rdev_blocked(rdev)) {
blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
break;
}
}
rrdev = conf->mirrors[i].replacement;
if (rrdev && rdev_blocked(rrdev)) {
atomic_inc(&rrdev->nr_pending);
blocked_rdev = rrdev;
break;
}
}
if (unlikely(blocked_rdev)) {

View File

@ -258,7 +258,7 @@ static struct ppl_io_unit *ppl_new_iounit(struct ppl_log *log,
memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
pplhdr->signature = cpu_to_le32(ppl_conf->signature);
io->seq = atomic64_add_return(1, &ppl_conf->seq);
io->seq = atomic64_inc_return(&ppl_conf->seq);
pplhdr->generation = cpu_to_le64(io->seq);
return io;

View File

@ -4724,14 +4724,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
if (rdev) {
is_bad = rdev_has_badblock(rdev, sh->sector,
RAID5_STRIPE_SECTORS(conf));
if (s->blocked_rdev == NULL
&& (test_bit(Blocked, &rdev->flags)
|| is_bad < 0)) {
if (s->blocked_rdev == NULL) {
if (is_bad < 0)
set_bit(BlockedBadBlocks,
&rdev->flags);
s->blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
set_bit(BlockedBadBlocks, &rdev->flags);
if (rdev_blocked(rdev)) {
s->blocked_rdev = rdev;
atomic_inc(&rdev->nr_pending);
}
}
}
clear_bit(R5_Insync, &dev->flags);