From ab9bc81c1cf0efc7fc5a3aa4e562aa88d09ada57 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Nov 2024 05:45:34 -0700 Subject: [PATCH] Revert "block: pre-calculate max_zone_append_sectors" This causes issue on, at least, nvme-mpath where my boot fails with: WARNING: CPU: 354 PID: 2729 at block/blk-settings.c:75 blk_validate_limits+0x356/0x380 Modules linked in: tg3(+) nvme usbcore scsi_mod ptp i2c_piix4 libphy nvme_core crc32c_intel scsi_common usb_common pps_core i2c_smbus CPU: 354 UID: 0 PID: 2729 Comm: kworker/u2061:1 Not tainted 6.12.0-rc6+ #181 Hardware name: Dell Inc. PowerEdge R7625/06444F, BIOS 1.8.3 04/02/2024 Workqueue: async async_run_entry_fn RIP: 0010:blk_validate_limits+0x356/0x380 Code: f6 47 01 04 75 28 83 bf 94 00 00 00 00 75 39 83 bf 98 00 00 00 00 75 34 83 7f 68 00 75 32 31 c0 83 7f 5c 00 0f 84 9b fd ff ff <0f> 0b eb 13 0f 0b eb 0f 48 c7 c0 74 12 58 92 48 89 c7 e8 13 76 46 RSP: 0018:ffffa8a1dfb93b30 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff9232829c8388 RCX: 0000000000000088 RDX: 0000000000000080 RSI: 0000000000000200 RDI: ffffa8a1dfb93c38 RBP: 000000000000000c R08: 00000000ffffffff R09: 000000000000ffff R10: 0000000000000000 R11: 0000000000000000 R12: ffff9232829b9000 R13: ffff9232829b9010 R14: ffffa8a1dfb93c38 R15: ffffa8a1dfb93c38 FS: 0000000000000000(0000) GS:ffff923867c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c1b92480a8 CR3: 0000002484ff0002 CR4: 0000000000370ef0 Call Trace: ? __warn+0xca/0x1a0 ? blk_validate_limits+0x356/0x380 ? report_bug+0x11a/0x1a0 ? handle_bug+0x5e/0x90 ? exc_invalid_op+0x16/0x40 ? asm_exc_invalid_op+0x16/0x20 ? blk_validate_limits+0x356/0x380 blk_alloc_queue+0x7a/0x250 __blk_alloc_disk+0x39/0x80 nvme_mpath_alloc_disk+0x13d/0x1b0 [nvme_core] nvme_scan_ns+0xcc7/0x1010 [nvme_core] async_run_entry_fn+0x27/0x120 process_scheduled_works+0x1a0/0x360 worker_thread+0x2bc/0x350 ? pr_cont_work+0x1b0/0x1b0 kthread+0x111/0x120 ? kthread_unuse_mm+0x90/0x90 ret_from_fork+0x30/0x40 ? kthread_unuse_mm+0x90/0x90 ret_from_fork_asm+0x11/0x20 ---[ end trace 0000000000000000 ]--- presumably due to max_zone_append_sectors not being cleared to zero, resulting in blk_validate_zoned_limits() complaining and failing. This reverts commit 2a8f6153e1c2db06a537a5c9d61102eb591776f1. Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-merge.c | 3 ++- block/blk-settings.c | 25 +++++++++++++------------ block/blk-sysfs.c | 17 ++++++++++++++--- drivers/block/null_blk/zoned.c | 2 +- drivers/block/ublk_drv.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/md/dm-zone.c | 4 ++-- drivers/nvme/host/multipath.c | 2 +- drivers/nvme/host/zns.c | 2 +- drivers/scsi/sd_zbc.c | 2 ++ include/linux/blkdev.h | 21 ++++++++++++++++++--- 12 files changed, 57 insertions(+), 27 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 5df4607321ca..09d10bb95fda 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -607,7 +607,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_IOERR; /* Make sure the BIO is small enough and will not get split */ - if (nr_sectors > q->limits.max_zone_append_sectors) + if (nr_sectors > queue_max_zone_append_sectors(q)) return BLK_STS_IOERR; bio->bi_opf |= REQ_NOMERGE; diff --git a/block/blk-merge.c b/block/blk-merge.c index 7c1375a080ad..d813d799cee7 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -388,10 +388,11 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, struct bio *bio_split_zone_append(struct bio *bio, const struct queue_limits *lim, unsigned *nr_segs) { + unsigned int max_sectors = queue_limits_max_zone_append_sectors(lim); int split_sectors; split_sectors = bio_split_rw_at(bio, lim, nr_segs, - lim->max_zone_append_sectors << SECTOR_SHIFT); + max_sectors << SECTOR_SHIFT); if (WARN_ON_ONCE(split_sectors > 0)) split_sectors = -EINVAL; return bio_submit_split(bio, split_sectors); diff --git a/block/blk-settings.c b/block/blk-settings.c index 5cb69d85af0e..5ee3d6d1448d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -91,16 +91,17 @@ static int blk_validate_zoned_limits(struct queue_limits *lim) if (lim->zone_write_granularity < lim->logical_block_size) lim->zone_write_granularity = lim->logical_block_size; - /* - * The Zone Append size is limited by the maximum I/O size and the zone - * size given that it can't span zones. - * - * If no max_hw_zone_append_sectors limit is provided, the block layer - * will emulated it, else we're also bound by the hardware limit. - */ - lim->max_zone_append_sectors = - min_not_zero(lim->max_hw_zone_append_sectors, - min(lim->chunk_sectors, lim->max_hw_sectors)); + if (lim->max_zone_append_sectors) { + /* + * The Zone Append size is limited by the maximum I/O size + * and the zone size given that it can't span zones. + */ + lim->max_zone_append_sectors = + min3(lim->max_hw_sectors, + lim->max_zone_append_sectors, + lim->chunk_sectors); + } + return 0; } @@ -526,8 +527,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors); t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors, b->max_write_zeroes_sectors); - t->max_hw_zone_append_sectors = min(t->max_hw_zone_append_sectors, - b->max_hw_zone_append_sectors); + t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t), + queue_limits_max_zone_append_sectors(b)); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d9f22122ae2f..741b95dfdbf6 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -131,7 +131,6 @@ QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors) QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors) -QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors) #define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \ @@ -179,6 +178,18 @@ static ssize_t queue_max_discard_sectors_store(struct gendisk *disk, return ret; } +/* + * For zone append queue_max_zone_append_sectors does not just return the + * underlying queue limits, but actually contains a calculation. Because of + * that we can't simply use QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES here. + */ +static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page) +{ + return sprintf(page, "%llu\n", + (u64)queue_max_zone_append_sectors(disk->queue) << + SECTOR_SHIFT); +} + static ssize_t queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count) { @@ -468,7 +479,7 @@ QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes"); QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes"); QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes"); -QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes"); +QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes"); QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity"); QUEUE_RO_ENTRY(queue_zoned, "zoned"); @@ -596,7 +607,7 @@ static struct attribute *queue_attrs[] = { &queue_atomic_write_unit_max_entry.attr, &queue_write_same_max_entry.attr, &queue_max_write_zeroes_sectors_entry.attr, - &queue_max_zone_append_sectors_entry.attr, + &queue_zone_append_max_entry.attr, &queue_zone_write_granularity_entry.attr, &queue_rotational_entry.attr, &queue_zoned_entry.attr, diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 0d5f9bf95229..9bc768b2ca56 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -166,7 +166,7 @@ int null_init_zoned_dev(struct nullb_device *dev, lim->features |= BLK_FEAT_ZONED; lim->chunk_sectors = dev->zone_size_sects; - lim->max_hw_zone_append_sectors = dev->zone_append_max_sectors; + lim->max_zone_append_sectors = dev->zone_append_max_sectors; lim->max_open_zones = dev->zone_max_open; lim->max_active_zones = dev->zone_max_active; return 0; diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 8d938b2b41ee..59951e7c2593 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2270,7 +2270,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) lim.features |= BLK_FEAT_ZONED; lim.max_active_zones = p->max_active_zones; lim.max_open_zones = p->max_open_zones; - lim.max_hw_zone_append_sectors = p->max_zone_append_sectors; + lim.max_zone_append_sectors = p->max_zone_append_sectors; } if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0e99a4714928..194417abc105 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -784,7 +784,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk, wg, v); return -ENODEV; } - lim->max_hw_zone_append_sectors = v; + lim->max_zone_append_sectors = v; dev_dbg(&vdev->dev, "max append sectors = %u\n", v); return 0; diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 20edd3fabbab..c0d41c36e06e 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -344,7 +344,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); } else { set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); - lim->max_hw_zone_append_sectors = 0; + lim->max_zone_append_sectors = 0; } /* @@ -379,7 +379,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, if (!zlim.mapped_nr_seq_zones) { lim->max_open_zones = 0; lim->max_active_zones = 0; - lim->max_hw_zone_append_sectors = 0; + lim->max_zone_append_sectors = 0; lim->zone_write_granularity = 0; lim->chunk_sectors = 0; lim->features &= ~BLK_FEAT_ZONED; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index c26cb7d3a2e5..6a15873055b9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -636,7 +636,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) if (head->ids.csi == NVME_CSI_ZNS) lim.features |= BLK_FEAT_ZONED; else - lim.max_hw_zone_append_sectors = 0; + lim.max_zone_append_sectors = 0; head->disk = blk_alloc_disk(&lim, ctrl->numa_node); if (IS_ERR(head->disk)) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 382949e18c6a..9a06f9d98cd6 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -111,7 +111,7 @@ void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim, lim->features |= BLK_FEAT_ZONED; lim->max_open_zones = zi->max_open_zones; lim->max_active_zones = zi->max_active_zones; - lim->max_hw_zone_append_sectors = ns->ctrl->max_zone_append; + lim->max_zone_append_sectors = ns->ctrl->max_zone_append; lim->chunk_sectors = ns->head->zsze = nvme_lba_to_sect(ns->head, zi->zone_size); } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index de5c54c057ec..ee2b74238758 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -634,6 +634,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, lim->max_open_zones = sdkp->zones_max_open; lim->max_active_zones = 0; lim->chunk_sectors = logical_to_sectors(sdkp->device, zone_blocks); + /* Enable block layer zone append emulation */ + lim->max_zone_append_sectors = 0; return 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6d1413bd69a5..7bfc877e159e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -375,7 +375,6 @@ struct queue_limits { unsigned int max_user_discard_sectors; unsigned int max_secure_erase_sectors; unsigned int max_write_zeroes_sectors; - unsigned int max_hw_zone_append_sectors; unsigned int max_zone_append_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -1205,9 +1204,25 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q) return q->limits.max_segment_size; } +static inline unsigned int +queue_limits_max_zone_append_sectors(const struct queue_limits *l) +{ + unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors); + + return min_not_zero(l->max_zone_append_sectors, max_sectors); +} + +static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q) +{ + if (!blk_queue_is_zoned(q)) + return 0; + + return queue_limits_max_zone_append_sectors(&q->limits); +} + static inline bool queue_emulates_zone_append(struct request_queue *q) { - return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors; + return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors; } static inline bool bdev_emulates_zone_append(struct block_device *bdev) @@ -1218,7 +1233,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev) static inline unsigned int bdev_max_zone_append_sectors(struct block_device *bdev) { - return bdev_limits(bdev)->max_zone_append_sectors; + return queue_max_zone_append_sectors(bdev_get_queue(bdev)); } static inline unsigned int bdev_max_segments(struct block_device *bdev)