diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b16d2ee5e9dd..b45d641f4f33 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -318,25 +318,34 @@ static void raid1_end_read_request(struct bio *bio, int error) rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } +static void close_write(r1bio_t *r1_bio) +{ + /* it really is the end of this request */ + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + /* free extra copy of the data pages */ + int i = r1_bio->behind_page_count; + while (i--) + safe_put_page(r1_bio->behind_bvecs[i].bv_page); + kfree(r1_bio->behind_bvecs); + r1_bio->behind_bvecs = NULL; + } + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + test_bit(R1BIO_BehindIO, &r1_bio->state)); + md_write_end(r1_bio->mddev); +} + static void r1_bio_write_done(r1bio_t *r1_bio) { - if (atomic_dec_and_test(&r1_bio->remaining)) - { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - /* free extra copy of the data pages */ - int i = r1_bio->behind_page_count; - while (i--) - safe_put_page(r1_bio->behind_bvecs[i].bv_page); - kfree(r1_bio->behind_bvecs); - r1_bio->behind_bvecs = NULL; - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - test_bit(R1BIO_BehindIO, &r1_bio->state)); - md_write_end(r1_bio->mddev); + if (!atomic_dec_and_test(&r1_bio->remaining)) + return; + + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + close_write(r1_bio); if (test_bit(R1BIO_MadeGood, &r1_bio->state)) reschedule_retry(r1_bio); else @@ -360,12 +369,10 @@ static void raid1_end_write_request(struct bio *bio, int error) /* * 'one mirror IO has finished' event handler: */ - r1_bio->bios[mirror] = NULL; - to_put = bio; if (!uptodate) { - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); + set_bit(WriteErrorSeen, + &conf->mirrors[mirror].rdev->flags); + set_bit(R1BIO_WriteError, &r1_bio->state); } else { /* * Set R1BIO_Uptodate in our master bio, so that we @@ -380,6 +387,8 @@ static void raid1_end_write_request(struct bio *bio, int error) sector_t first_bad; int bad_sectors; + r1_bio->bios[mirror] = NULL; + to_put = bio; set_bit(R1BIO_Uptodate, &r1_bio->state); /* Maybe we can clear some bad blocks. */ @@ -1724,6 +1733,101 @@ static void fix_read_error(conf_t *conf, int read_disk, } } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +static int submit_bio_wait(int rw, struct bio *bio) +{ + struct completion event; + rw |= REQ_SYNC; + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(rw, bio); + wait_for_completion(&event); + + return test_bit(BIO_UPTODATE, &bio->bi_flags); +} + +static int narrow_write_error(r1bio_t *r1_bio, int i) +{ + mddev_t *mddev = r1_bio->mddev; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev = conf->mirrors[i].rdev; + int vcnt, idx; + struct bio_vec *vec; + + /* bio has the data to be written to device 'i' where + * we just recently had a write error. + * We repeatedly clone the bio and trim down to one block, + * then try the write. Where the write fails we record + * a bad block. + * It is conceivable that the bio doesn't exactly align with + * blocks. We must handle this somehow. + * + * We currently own a reference on the rdev. + */ + + int block_sectors; + sector_t sector; + int sectors; + int sect_to_write = r1_bio->sectors; + int ok = 1; + + if (rdev->badblocks.shift < 0) + return 0; + + block_sectors = 1 << rdev->badblocks.shift; + sector = r1_bio->sector; + sectors = ((sector + block_sectors) + & ~(sector_t)(block_sectors - 1)) + - sector; + + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + vcnt = r1_bio->behind_page_count; + vec = r1_bio->behind_bvecs; + idx = 0; + while (vec[idx].bv_page == NULL) + idx++; + } else { + vcnt = r1_bio->master_bio->bi_vcnt; + vec = r1_bio->master_bio->bi_io_vec; + idx = r1_bio->master_bio->bi_idx; + } + while (sect_to_write) { + struct bio *wbio; + if (sectors > sect_to_write) + sectors = sect_to_write; + /* Write at 'sector' for 'sectors'*/ + + wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); + memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); + wbio->bi_sector = r1_bio->sector; + wbio->bi_rw = WRITE; + wbio->bi_vcnt = vcnt; + wbio->bi_size = r1_bio->sectors << 9; + wbio->bi_idx = idx; + + md_trim_bio(wbio, sector - r1_bio->sector, sectors); + wbio->bi_sector += rdev->data_offset; + wbio->bi_bdev = rdev->bdev; + if (submit_bio_wait(WRITE, wbio) == 0) + /* failure! */ + ok = rdev_set_badblocks(rdev, sector, + sectors, 0) + && ok; + + bio_put(wbio); + sect_to_write -= sectors; + sector += sectors; + sectors = block_sectors; + } + return ok; +} + static void raid1d(mddev_t *mddev) { r1bio_t *r1_bio; @@ -1775,7 +1879,8 @@ static void raid1d(mddev_t *mddev) md_done_sync(mddev, s, 1); } else sync_request_write(mddev, r1_bio); - } else if (test_bit(R1BIO_MadeGood, &r1_bio->state)) { + } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) { int m; for (m = 0; m < conf->raid_disks ; m++) if (r1_bio->bios[m] == IO_MADE_GOOD) { @@ -1785,7 +1890,24 @@ static void raid1d(mddev_t *mddev) r1_bio->sector, r1_bio->sectors); rdev_dec_pending(rdev, mddev); + } else if (r1_bio->bios[m] != NULL) { + /* This drive got a write error. We + * need to narrow down and record + * precise write errors. + */ + if (!narrow_write_error(r1_bio, m)) { + md_error(mddev, + conf->mirrors[m].rdev); + /* an I/O failed, we can't clear + * the bitmap */ + set_bit(R1BIO_Degraded, + &r1_bio->state); + } + rdev_dec_pending(conf->mirrors[m].rdev, + mddev); } + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + close_write(r1_bio); raid_end_bio_io(r1_bio); } else if (test_bit(R1BIO_ReadError, &r1_bio->state)) { int disk; diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index b7885474e96c..e0d676b48974 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -145,7 +145,8 @@ struct r1bio_s { /* If a write for this request means we can clear some * known-bad-block records, we set this flag */ -#define R1BIO_MadeGood 7 +#define R1BIO_MadeGood 7 +#define R1BIO_WriteError 8 extern int md_raid1_congested(mddev_t *mddev, int bits);