From b82d9fa257cb3725c49d94d2aeafc4677c34448a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:20 -0700 Subject: [PATCH 001/178] block: fix infinite loop for invalid zone append Returning 0 early from __bio_iov_append_get_pages() for the max_append_sectors warning just creates an infinite loop since 0 means success, and the bio will never fill from the unadvancing iov_iter. We could turn the return into an error value, but it will already be turned into an error value later on, so just remove the warning. Clearly no one ever hit it anyway. Fixes: 0512a75b98f84 ("block: Introduce REQ_OP_ZONE_APPEND") Signed-off-by: Keith Busch Reviewed-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220610195830.3574005-2-kbusch@fb.com Signed-off-by: Jens Axboe --- block/bio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/bio.c b/block/bio.c index 51c99f2c5c90..d9ff51fc457e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1229,9 +1229,6 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) size_t offset; int ret = 0; - if (WARN_ON_ONCE(!max_append_sectors)) - return 0; - /* * Move page array up in the allocated memory for the bio vecs as far as * possible so that we can start filling biovecs from the beginning From c58c0074c54c2e2bb3bb0d5a4d8896bb660cc8bc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:21 -0700 Subject: [PATCH 002/178] block/bio: remove duplicate append pages code The getting pages setup for zone append and normal IO are identical. Use common code for each. Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-3-kbusch@fb.com Signed-off-by: Jens Axboe --- block/bio.c | 104 ++++++++++++++++++++++------------------------------ 1 file changed, 43 insertions(+), 61 deletions(-) diff --git a/block/bio.c b/block/bio.c index d9ff51fc457e..ee5fe1bb015e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1159,6 +1159,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off) put_page(pages[i]); } +static int bio_iov_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + bool same_page = false; + + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { + if (WARN_ON_ONCE(bio_full(bio, len))) + return -EINVAL; + __bio_add_page(bio, page, len, offset); + return 0; + } + + if (same_page) + put_page(page); + return 0; +} + +static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + bool same_page = false; + + if (bio_add_hw_page(q, bio, page, len, offset, + queue_max_zone_append_sectors(q), &same_page) != len) + return -EINVAL; + if (same_page) + put_page(page); + return 0; +} + #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) /** @@ -1177,58 +1208,10 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - bool same_page = false; ssize_t size, left; unsigned len, i; size_t offset; - /* - * Move page array up in the allocated memory for the bio vecs as far as - * possible so that we can start filling biovecs from the beginning - * without overwriting the temporary page array. - */ - BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); - pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); - if (unlikely(size <= 0)) - return size ? size : -EFAULT; - - for (left = size, i = 0; left > 0; left -= len, i++) { - struct page *page = pages[i]; - - len = min_t(size_t, PAGE_SIZE - offset, left); - - if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { - if (same_page) - put_page(page); - } else { - if (WARN_ON_ONCE(bio_full(bio, len))) { - bio_put_pages(pages + i, left, offset); - return -EINVAL; - } - __bio_add_page(bio, page, len, offset); - } - offset = 0; - } - - iov_iter_advance(iter, size); - return 0; -} - -static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) -{ - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; - unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - unsigned int max_append_sectors = queue_max_zone_append_sectors(q); - struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; - struct page **pages = (struct page **)bv; - ssize_t size, left; - unsigned len, i; - size_t offset; - int ret = 0; - /* * Move page array up in the allocated memory for the bio vecs as far as * possible so that we can start filling biovecs from the beginning @@ -1243,22 +1226,24 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) for (left = size, i = 0; left > 0; left -= len, i++) { struct page *page = pages[i]; - bool same_page = false; + int ret; len = min_t(size_t, PAGE_SIZE - offset, left); - if (bio_add_hw_page(q, bio, page, len, offset, - max_append_sectors, &same_page) != len) { + if (bio_op(bio) == REQ_OP_ZONE_APPEND) + ret = bio_iov_add_zone_append_page(bio, page, len, + offset); + else + ret = bio_iov_add_page(bio, page, len, offset); + + if (ret) { bio_put_pages(pages + i, left, offset); - ret = -EINVAL; - break; + return ret; } - if (same_page) - put_page(page); offset = 0; } - iov_iter_advance(iter, size - left); - return ret; + iov_iter_advance(iter, size); + return 0; } /** @@ -1295,10 +1280,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) } do { - if (bio_op(bio) == REQ_OP_ZONE_APPEND) - ret = __bio_iov_append_get_pages(bio, iter); - else - ret = __bio_iov_iter_get_pages(bio, iter); + ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); /* don't account direct I/O as memory stall */ From 3850e13f2853a17c083eb00fad2c6da4fde0b41e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:22 -0700 Subject: [PATCH 003/178] block: export dma_alignment attribute User space may want to know how to align their buffers to avoid bouncing. Export the queue attribute. Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-4-kbusch@fb.com Signed-off-by: Jens Axboe --- Documentation/ABI/stable/sysfs-block | 9 +++++++++ block/blk-sysfs.c | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block index e8797cd09aff..cd14ecb3c9a5 100644 --- a/Documentation/ABI/stable/sysfs-block +++ b/Documentation/ABI/stable/sysfs-block @@ -260,6 +260,15 @@ Description: for discards, and don't read this file. +What: /sys/block//queue/dma_alignment +Date: May 2022 +Contact: linux-block@vger.kernel.org +Description: + Reports the alignment that user space addresses must have to be + used for raw block device access with O_DIRECT and other driver + specific passthrough mechanisms. + + What: /sys/block//queue/fua Date: May 2018 Contact: linux-block@vger.kernel.org diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9b905e9443e4..ec716ea26b92 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -274,6 +274,11 @@ static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page return queue_var_show(q->limits.virt_boundary_mask, page); } +static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_dma_alignment(q), page); +} + #define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ static ssize_t \ queue_##name##_show(struct request_queue *q, char *page) \ @@ -606,6 +611,7 @@ QUEUE_RO_ENTRY(queue_dax, "dax"); QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout"); QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec"); QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask"); +QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment"); #ifdef CONFIG_BLK_DEV_THROTTLING_LOW QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time"); @@ -667,6 +673,7 @@ static struct attribute *queue_attrs[] = { &blk_throtl_sample_time_entry.attr, #endif &queue_virt_boundary_mask_entry.attr, + &queue_dma_alignment_entry.attr, NULL, }; From 4a2dcc35911324d6fcde09b1760cf4f2962699ef Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:23 -0700 Subject: [PATCH 004/178] block: introduce bdev_dma_alignment helper Preparing for upcoming dma_alignment users that have a block_device, but don't need the request_queue. Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-5-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f7b43444c5f..2556fcdb645b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1365,6 +1365,11 @@ static inline int queue_dma_alignment(const struct request_queue *q) return q ? q->dma_alignment : 511; } +static inline unsigned int bdev_dma_alignment(struct block_device *bdev) +{ + return queue_dma_alignment(bdev_get_queue(bdev)); +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { From 37fee2e42ebbc33e5b7b6944979792bd9aa09e34 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:24 -0700 Subject: [PATCH 005/178] block: add a helper function for dio alignment This will make it easier to add more complex acceptable alignment criteria in the future. Signed-off-by: Keith Busch Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-6-kbusch@fb.com Signed-off-by: Jens Axboe --- block/fops.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/block/fops.c b/block/fops.c index d6b3276a6c68..9d32df6fc315 100644 --- a/block/fops.c +++ b/block/fops.c @@ -42,6 +42,13 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) return op; } +static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos, + struct iov_iter *iter) +{ + return ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)); +} + #define DIO_INLINE_BIO_VECS 4 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, @@ -54,8 +61,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct bio bio; ssize_t ret; - if ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)) + if (blkdev_dio_unaligned(bdev, pos, iter)) return -EINVAL; if (nr_pages <= DIO_INLINE_BIO_VECS) @@ -173,8 +179,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos = iocb->ki_pos; int ret = 0; - if ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)) + if (blkdev_dio_unaligned(bdev, pos, iter)) return -EINVAL; if (iocb->ki_flags & IOCB_ALLOC_CACHE) @@ -298,8 +303,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, loff_t pos = iocb->ki_pos; int ret = 0; - if ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)) + if (blkdev_dio_unaligned(bdev, pos, iter)) return -EINVAL; if (iocb->ki_flags & IOCB_ALLOC_CACHE) From 67927d22015060967122facc8cfeaad8012e8808 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:25 -0700 Subject: [PATCH 006/178] block/merge: count bytes instead of sectors Individual bv_len's may not be a sector size. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220610195830.3574005-7-kbusch@fb.com Signed-off-by: Jens Axboe --- block/blk-merge.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 7771dacc99cb..db2e03c8af7f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -201,11 +201,11 @@ static inline unsigned get_max_segment_size(const struct request_queue *q, * @nsegs: [in,out] Number of segments in the bio being built. Incremented * by the number of segments from @bv that may be appended to that * bio without exceeding @max_segs - * @sectors: [in,out] Number of sectors in the bio being built. Incremented - * by the number of sectors from @bv that may be appended to that - * bio without exceeding @max_sectors + * @bytes: [in,out] Number of bytes in the bio being built. Incremented + * by the number of bytes from @bv that may be appended to that + * bio without exceeding @max_bytes * @max_segs: [in] upper bound for *@nsegs - * @max_sectors: [in] upper bound for *@sectors + * @max_bytes: [in] upper bound for *@bytes * * When splitting a bio, it can happen that a bvec is encountered that is too * big to fit in a single segment and hence that it has to be split in the @@ -216,10 +216,10 @@ static inline unsigned get_max_segment_size(const struct request_queue *q, */ static bool bvec_split_segs(const struct request_queue *q, const struct bio_vec *bv, unsigned *nsegs, - unsigned *sectors, unsigned max_segs, - unsigned max_sectors) + unsigned *bytes, unsigned max_segs, + unsigned max_bytes) { - unsigned max_len = (min(max_sectors, UINT_MAX >> 9) - *sectors) << 9; + unsigned max_len = min(max_bytes, UINT_MAX) - *bytes; unsigned len = min(bv->bv_len, max_len); unsigned total_len = 0; unsigned seg_size = 0; @@ -237,7 +237,7 @@ static bool bvec_split_segs(const struct request_queue *q, break; } - *sectors += total_len >> 9; + *bytes += total_len; /* tell the caller to split the bvec if it is too big to fit */ return len > 0 || bv->bv_len > max_len; @@ -269,8 +269,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, { struct bio_vec bv, bvprv, *bvprvp = NULL; struct bvec_iter iter; - unsigned nsegs = 0, sectors = 0; - const unsigned max_sectors = get_max_io_size(q, bio); + unsigned nsegs = 0, bytes = 0; + const unsigned max_bytes = get_max_io_size(q, bio) << 9; const unsigned max_segs = queue_max_segments(q); bio_for_each_bvec(bv, bio, iter) { @@ -282,12 +282,12 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, goto split; if (nsegs < max_segs && - sectors + (bv.bv_len >> 9) <= max_sectors && + bytes + bv.bv_len <= max_bytes && bv.bv_offset + bv.bv_len <= PAGE_SIZE) { nsegs++; - sectors += bv.bv_len >> 9; - } else if (bvec_split_segs(q, &bv, &nsegs, §ors, max_segs, - max_sectors)) { + bytes += bv.bv_len; + } else if (bvec_split_segs(q, &bv, &nsegs, &bytes, max_segs, + max_bytes)) { goto split; } @@ -300,13 +300,20 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, split: *segs = nsegs; + /* + * Individual bvecs might not be logical block aligned. Round down the + * split size so that each bio is properly block size aligned, even if + * we do not use the full hardware limits. + */ + bytes = ALIGN_DOWN(bytes, queue_logical_block_size(q)); + /* * Bio splitting may cause subtle trouble such as hang when doing sync * iopoll in direct IO routine. Given performance gain of iopoll for * big IO can be trival, disable iopoll when split needed. */ bio_clear_polled(bio); - return bio_split(bio, sectors, GFP_NOIO, bs); + return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs); } /** @@ -375,7 +382,7 @@ EXPORT_SYMBOL(blk_queue_split); unsigned int blk_recalc_rq_segments(struct request *rq) { unsigned int nr_phys_segs = 0; - unsigned int nr_sectors = 0; + unsigned int bytes = 0; struct req_iterator iter; struct bio_vec bv; @@ -398,7 +405,7 @@ unsigned int blk_recalc_rq_segments(struct request *rq) } rq_for_each_bvec(bv, rq, iter) - bvec_split_segs(rq->q, &bv, &nr_phys_segs, &nr_sectors, + bvec_split_segs(rq->q, &bv, &nr_phys_segs, &bytes, UINT_MAX, UINT_MAX); return nr_phys_segs; } From 9cfe3ddecdc556ab1d1693b29e8a26ba80953ccc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:26 -0700 Subject: [PATCH 007/178] block/bounce: count bytes instead of sectors Individual bv_len's may not be a sector size. Signed-off-by: Keith Busch Reviewed-by: Damien Le Moal Reviewed-by: Pankaj Raghav Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-8-kbusch@fb.com Signed-off-by: Jens Axboe --- block/bounce.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/block/bounce.c b/block/bounce.c index 8f7b6fe3b4db..c8f487af7be3 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -205,19 +205,26 @@ void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) int rw = bio_data_dir(*bio_orig); struct bio_vec *to, from; struct bvec_iter iter; - unsigned i = 0; + unsigned i = 0, bytes = 0; bool bounce = false; - int sectors = 0; + int sectors; bio_for_each_segment(from, *bio_orig, iter) { if (i++ < BIO_MAX_VECS) - sectors += from.bv_len >> 9; + bytes += from.bv_len; if (PageHighMem(from.bv_page)) bounce = true; } if (!bounce) return; + /* + * Individual bvecs might not be logical block aligned. Round down + * the split size so that each bio is properly block size aligned, + * even if we do not use the full hardware limits. + */ + sectors = ALIGN_DOWN(bytes, queue_logical_block_size(q)) >> + SECTOR_SHIFT; if (sectors < bio_sectors(*bio_orig)) { bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split); bio_chain(bio, *bio_orig); From cfa320f72882f0e944e2237287db84b0f7df877d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:27 -0700 Subject: [PATCH 008/178] iov: introduce iov_iter_aligned The existing iov_iter_alignment() function returns the logical OR of address and length. For cases where address and length need to be considered separately, introduce a helper function that a caller can specificy length and address masks that indicate if the iov is unaligned. Cc: Alexander Viro Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-9-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/uio.h | 2 + lib/iov_iter.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/include/linux/uio.h b/include/linux/uio.h index 739285fe5a2f..34ba4a731179 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -219,6 +219,8 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i); #endif size_t iov_iter_zero(size_t bytes, struct iov_iter *); +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask); unsigned long iov_iter_alignment(const struct iov_iter *i); unsigned long iov_iter_gap_alignment(const struct iov_iter *i); void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0b64695ab632..507e732ef7cf 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1268,6 +1268,98 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) } EXPORT_SYMBOL(iov_iter_discard); +static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask) +{ + size_t size = i->count; + size_t skip = i->iov_offset; + unsigned k; + + for (k = 0; k < i->nr_segs; k++, skip = 0) { + size_t len = i->iov[k].iov_len - skip; + + if (len > size) + len = size; + if (len & len_mask) + return false; + if ((unsigned long)(i->iov[k].iov_base + skip) & addr_mask) + return false; + + size -= len; + if (!size) + break; + } + return true; +} + +static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask) +{ + size_t size = i->count; + unsigned skip = i->iov_offset; + unsigned k; + + for (k = 0; k < i->nr_segs; k++, skip = 0) { + size_t len = i->bvec[k].bv_len - skip; + + if (len > size) + len = size; + if (len & len_mask) + return false; + if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask) + return false; + + size -= len; + if (!size) + break; + } + return true; +} + +/** + * iov_iter_is_aligned() - Check if the addresses and lengths of each segments + * are aligned to the parameters. + * + * @i: &struct iov_iter to restore + * @addr_mask: bit mask to check against the iov element's addresses + * @len_mask: bit mask to check against the iov element's lengths + * + * Return: false if any addresses or lengths intersect with the provided masks + */ +bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, + unsigned len_mask) +{ + if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) + return iov_iter_aligned_iovec(i, addr_mask, len_mask); + + if (iov_iter_is_bvec(i)) + return iov_iter_aligned_bvec(i, addr_mask, len_mask); + + if (iov_iter_is_pipe(i)) { + unsigned int p_mask = i->pipe->ring_size - 1; + size_t size = i->count; + + if (size & len_mask) + return false; + if (size && allocated(&i->pipe->bufs[i->head & p_mask])) { + if (i->iov_offset & addr_mask) + return false; + } + + return true; + } + + if (iov_iter_is_xarray(i)) { + if (i->count & len_mask) + return false; + if ((i->xarray_start + i->iov_offset) & addr_mask) + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(iov_iter_is_aligned); + static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { unsigned long res = 0; From 5debd9691c3ac64c3acd6867c264ad38bbe48cdc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:28 -0700 Subject: [PATCH 009/178] block: introduce bdev_iter_is_aligned helper Provide a convenient function for this repeatable coding pattern. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-10-kbusch@fb.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2556fcdb645b..0b8bc1fe0b2c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1370,6 +1370,13 @@ static inline unsigned int bdev_dma_alignment(struct block_device *bdev) return queue_dma_alignment(bdev_get_queue(bdev)); } +static inline bool bdev_iter_is_aligned(struct block_device *bdev, + struct iov_iter *iter) +{ + return iov_iter_is_aligned(iter, bdev_dma_alignment(bdev), + bdev_logical_block_size(bdev) - 1); +} + static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { From b1a000d3b8ec582da64bb644be633e5a0beffcbf Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:29 -0700 Subject: [PATCH 010/178] block: relax direct io memory alignment Use the address alignment requirements from the block_device for direct io instead of requiring addresses be aligned to the block size. User space can discover the alignment requirements from the dma_alignment queue attribute. User space can specify any hardware compatible DMA offset for each segment, but every segment length is still required to be a multiple of the block size. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-11-kbusch@fb.com Signed-off-by: Jens Axboe --- block/bio.c | 9 +++++++++ block/fops.c | 4 ++-- include/linux/blkdev.h | 5 +++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/block/bio.c b/block/bio.c index ee5fe1bb015e..933ea3210954 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1220,7 +1220,16 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); + /* + * Each segment in the iov is required to be a block size multiple. + * However, we may not be able to get the entire segment if it spans + * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the + * result to ensure the bio's total size is correct. The remainder of + * the iov data will be picked up in the next bio iteration. + */ size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); + if (size > 0) + size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); if (unlikely(size <= 0)) return size ? size : -EFAULT; diff --git a/block/fops.c b/block/fops.c index 9d32df6fc315..86d3cab9bf93 100644 --- a/block/fops.c +++ b/block/fops.c @@ -45,8 +45,8 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb) static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos, struct iov_iter *iter) { - return ((pos | iov_iter_alignment(iter)) & - (bdev_logical_block_size(bdev) - 1)); + return pos & (bdev_logical_block_size(bdev) - 1) || + !bdev_iter_is_aligned(bdev, iter); } #define DIO_INLINE_BIO_VECS 4 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b8bc1fe0b2c..886c44e97434 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -424,6 +424,11 @@ struct request_queue { unsigned long nr_requests; /* Max # of requests */ unsigned int dma_pad_mask; + /* + * Drivers that set dma_alignment to less than 511 must be prepared to + * handle individual bvec's that are not a multiple of a SECTOR_SIZE + * due to possible offsets. + */ unsigned int dma_alignment; #ifdef CONFIG_BLK_INLINE_ENCRYPTION From bf8d08532bc19a14cfb54ae61099dccadefca446 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 10 Jun 2022 12:58:30 -0700 Subject: [PATCH 011/178] iomap: add support for dma aligned direct-io Use the address alignment requirements from the block_device for direct io instead of requiring addresses be aligned to the block size. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220610195830.3574005-12-kbusch@fb.com Signed-off-by: Jens Axboe --- fs/iomap/direct-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 370c3241618a..5d098adba443 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -242,7 +242,6 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, struct inode *inode = iter->inode; unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); unsigned int fs_block_size = i_blocksize(inode), pad; - unsigned int align = iov_iter_alignment(dio->submit.iter); loff_t length = iomap_length(iter); loff_t pos = iter->pos; unsigned int bio_opf; @@ -253,7 +252,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, size_t copied = 0; size_t orig_count; - if ((pos | length | align) & ((1 << blkbits) - 1)) + if ((pos | length) & ((1 << blkbits) - 1) || + !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter)) return -EINVAL; if (iomap->type == IOMAP_UNWRITTEN) { From 798f2a6f734de87633351c3ab13b17b07397cf68 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 15 Jun 2022 04:18:16 -0400 Subject: [PATCH 012/178] block: Directly use ida_alloc()/free() Use ida_alloc()/ida_free() instead of ida_simple_get()/ida_simple_remove(). The latter is deprecated and more verbose. Signed-off-by: Bo Liu Reviewed-by: Christophe JAILLET Link: https://lore.kernel.org/r/20220615081816.4342-1-liubo03@inspur.com Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- block/blk-sysfs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 27fb1357ad4b..c2cec402d01c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -435,7 +435,7 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) q->last_merge = NULL; - q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); + q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL); if (q->id < 0) goto fail_srcu; @@ -485,7 +485,7 @@ fail_stats: fail_split: bioset_exit(&q->bio_split); fail_id: - ida_simple_remove(&blk_queue_ida, q->id); + ida_free(&blk_queue_ida, q->id); fail_srcu: if (alloc_srcu) cleanup_srcu_struct(q->srcu); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ec716ea26b92..69e53d1a4f0e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -791,7 +791,7 @@ static void blk_release_queue(struct kobject *kobj) if (blk_queue_has_srcu(q)) cleanup_srcu_struct(q->srcu); - ida_simple_remove(&blk_queue_ida, q->id); + ida_free(&blk_queue_ida, q->id); call_rcu(&q->rcu_head, blk_free_queue_rcu); } From 62c159a03da92121b1b909fd8039028de97885fc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 15 Jun 2022 15:55:47 -0700 Subject: [PATCH 013/178] blk-iocost: Simplify ioc_rqos_done() Leave out the superfluous "& REQ_OP_MASK" code. The definition of req_op() shows that that code is superfluous: #define req_op(req) ((req)->cmd_flags & REQ_OP_MASK) Compile-tested only. Cc: Tejun Heo Acked-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220615225549.1054905-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 33a11ba971ea..b7082f2aed9c 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2769,7 +2769,7 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) return; - switch (req_op(rq) & REQ_OP_MASK) { + switch (req_op(rq)) { case REQ_OP_READ: pidx = QOS_RLAT; rw = READ; From 7e923f40a4d229bd9573d6cae64479d6a9770fc3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 15 Jun 2022 15:55:48 -0700 Subject: [PATCH 014/178] block: Rename a blk_mq_map_queue() argument Before the introduction of blk_mq_get_hctx_type(), blk_mq_map_queue() only used the flags from its second argument. Since the introduction of blk_mq_get_hctx_type(), blk_mq_map_queue() uses both the operation and the flags encoded in that argument. Rename the second argument of blk_mq_map_queue() to make this clear. Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220615225549.1054905-3-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-mq.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/blk-mq.h b/block/blk-mq.h index 2615bd58bad3..e4c6fe2c8ac8 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -86,16 +86,16 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue * return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]); } -static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) +static inline enum hctx_type blk_mq_get_hctx_type(unsigned int opf) { enum hctx_type type = HCTX_TYPE_DEFAULT; /* * The caller ensure that if REQ_POLLED, poll must be enabled. */ - if (flags & REQ_POLLED) + if (opf & REQ_POLLED) type = HCTX_TYPE_POLL; - else if ((flags & REQ_OP_MASK) == REQ_OP_READ) + else if ((opf & REQ_OP_MASK) == REQ_OP_READ) type = HCTX_TYPE_READ; return type; } @@ -103,14 +103,14 @@ static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags) /* * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue * @q: request queue - * @flags: request command flags + * @opf: operation type (REQ_OP_*) and flags (e.g. REQ_POLLED). * @ctx: software queue cpu ctx */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, - unsigned int flags, + unsigned int opf, struct blk_mq_ctx *ctx) { - return ctx->hctxs[blk_mq_get_hctx_type(flags)]; + return ctx->hctxs[blk_mq_get_hctx_type(opf)]; } /* From 51ab80f0aa861335eb80327af53e444a27e824b8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 15 Jun 2022 15:55:49 -0700 Subject: [PATCH 015/178] block: Make blk_mq_get_sq_hctx() select the proper hardware queue type Since the introduction of blk_mq_get_hctx_type() the operation type in the second argument of blk_mq_get_hctx_type() matters. The introduction of blk_mq_get_hctx_type() caused blk_mq_get_sq_hctx() to select a hardware queue of type HCTX_TYPE_READ instead of HCTX_TYPE_DEFAULT. Switch to hardware queue type HCTX_TYPE_DEFAULT since HCTX_TYPE_READ should only be used for read requests. Cc: Ming Lei Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220615225549.1054905-4-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 93d9d60980fb..fa3dc4f8f35d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2156,7 +2156,7 @@ static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) * just causes lock contention inside the scheduler and pointless cache * bouncing. */ - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, 0, ctx); + struct blk_mq_hw_ctx *hctx = ctx->hctxs[HCTX_TYPE_DEFAULT]; if (!blk_mq_hctx_stopped(hctx)) return hctx; From 8689461be3f1ce6686bc26f1f379790bb0fc7a8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:29 +0200 Subject: [PATCH 016/178] block: factor out a chunk_size_left helper Factor out a helper from blk_max_size_offset so that it can be reused independently. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Pankaj Raghav Link: https://lore.kernel.org/r/20220614090934.570632-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 886c44e97434..283961257cc9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -934,6 +934,17 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return q->limits.max_sectors; } +/* + * Return how much of the chunk is left to be used for I/O at a given offset. + */ +static inline unsigned int blk_chunk_sectors_left(sector_t offset, + unsigned int chunk_sectors) +{ + if (unlikely(!is_power_of_2(chunk_sectors))) + return chunk_sectors - sector_div(offset, chunk_sectors); + return chunk_sectors - (offset & (chunk_sectors - 1)); +} + /* * Return maximum size of a request at given offset. Only valid for * file system requests. @@ -949,12 +960,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, return q->limits.max_sectors; } - if (likely(is_power_of_2(chunk_sectors))) - chunk_sectors -= offset & (chunk_sectors - 1); - else - chunk_sectors -= sector_div(offset, chunk_sectors); - - return min(q->limits.max_sectors, chunk_sectors); + return min(q->limits.max_sectors, + blk_chunk_sectors_left(offset, chunk_sectors)); } /* From c39493222e41098cd15d11f972d16e943919506d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:30 +0200 Subject: [PATCH 017/178] dm: open code blk_max_size_offset in max_io_len max_io_len always passes an explicitly non-zero chunk_sectors into blk_max_size_offset. That means much of blk_max_size_offset is not needed and can be open coded to simplify the code. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20220614090934.570632-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2b75f1ef7386..4c04a980fcd9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1086,23 +1086,18 @@ static sector_t max_io_len(struct dm_target *ti, sector_t sector) { sector_t target_offset = dm_target_offset(ti, sector); sector_t len = max_io_len_target_boundary(ti, target_offset); - sector_t max_len; /* * Does the target need to split IO even further? * - varied (per target) IO splitting is a tenet of DM; this * explains why stacked chunk_sectors based splitting via - * blk_max_size_offset() isn't possible here. So pass in - * ti->max_io_len to override stacked chunk_sectors. + * blk_queue_split() isn't possible here. */ - if (ti->max_io_len) { - max_len = blk_max_size_offset(ti->table->md->queue, - target_offset, ti->max_io_len); - if (len > max_len) - len = max_len; - } - - return len; + if (!ti->max_io_len) + return len; + return min_t(sector_t, len, + min(queue_max_sectors(ti->table->md->queue), + blk_chunk_sectors_left(target_offset, ti->max_io_len))); } int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) From c887519074957f0ebd73b8158c2e0546d97ce0e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:31 +0200 Subject: [PATCH 018/178] block: open code blk_max_size_offset in blk_rq_get_max_sectors blk_rq_get_max_sectors always uses q->limits.chunk_sectors as the chunk_sectors argument, and already checks for max_sectors through the call to blk_queue_get_max_sectors. That means much of blk_max_size_offset is not needed and open coding it simplifies the code. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-merge.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index db2e03c8af7f..df003ecfbd47 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -566,17 +566,18 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, sector_t offset) { struct request_queue *q = rq->q; + unsigned int max_sectors; if (blk_rq_is_passthrough(rq)) return q->limits.max_hw_sectors; + max_sectors = blk_queue_get_max_sectors(q, req_op(rq)); if (!q->limits.chunk_sectors || req_op(rq) == REQ_OP_DISCARD || req_op(rq) == REQ_OP_SECURE_ERASE) - return blk_queue_get_max_sectors(q, req_op(rq)); - - return min(blk_max_size_offset(q, offset, 0), - blk_queue_get_max_sectors(q, req_op(rq))); + return max_sectors; + return min(max_sectors, + blk_chunk_sectors_left(offset, q->limits.chunk_sectors)); } static inline int ll_new_hw_segment(struct request *req, struct bio *bio, From 84613beda427b6e4c3d7a9ed2b68efd26300ec63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:32 +0200 Subject: [PATCH 019/178] block: cleanup variable naming in get_max_io_size get_max_io_size has a very odd choice of variables names and initialization patterns. Switch to more descriptive names and more clear initialization of them. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220614090934.570632-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-merge.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index df003ecfbd47..4da981efddee 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -164,18 +164,16 @@ static struct bio *blk_bio_write_zeroes_split(struct request_queue *q, static inline unsigned get_max_io_size(struct request_queue *q, struct bio *bio) { - unsigned sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0); - unsigned max_sectors = sectors; unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT; unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT; - unsigned start_offset = bio->bi_iter.bi_sector & (pbs - 1); + unsigned max_sectors, start, end; - max_sectors += start_offset; - max_sectors &= ~(pbs - 1); - if (max_sectors > start_offset) - return max_sectors - start_offset; - - return sectors & ~(lbs - 1); + max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0); + start = bio->bi_iter.bi_sector & (pbs - 1); + end = (start + max_sectors) & ~(pbs - 1); + if (end > start) + return end - start; + return max_sectors & ~(lbs - 1); } static inline unsigned get_max_segment_size(const struct request_queue *q, From efef739d5f37dc998b113fb965aea68d42a9eddc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:33 +0200 Subject: [PATCH 020/178] block: fold blk_max_size_offset into get_max_io_size Now that blk_max_size_offset has a single caller left, fold it into that and clean up the naming convention for the local variables there. Signed-off-by: Christoph Hellwig Reviewed-by: Pankaj Raghav Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-6-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-merge.c | 9 +++++++-- include/linux/blkdev.h | 19 ------------------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 4da981efddee..0f5f42ebd0bb 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -166,9 +166,14 @@ static inline unsigned get_max_io_size(struct request_queue *q, { unsigned pbs = queue_physical_block_size(q) >> SECTOR_SHIFT; unsigned lbs = queue_logical_block_size(q) >> SECTOR_SHIFT; - unsigned max_sectors, start, end; + unsigned max_sectors = queue_max_sectors(q), start, end; + + if (q->limits.chunk_sectors) { + max_sectors = min(max_sectors, + blk_chunk_sectors_left(bio->bi_iter.bi_sector, + q->limits.chunk_sectors)); + } - max_sectors = blk_max_size_offset(q, bio->bi_iter.bi_sector, 0); start = bio->bi_iter.bi_sector & (pbs - 1); end = (start + max_sectors) & ~(pbs - 1); if (end > start) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 283961257cc9..652c357dafb9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -945,25 +945,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, return chunk_sectors - (offset & (chunk_sectors - 1)); } -/* - * Return maximum size of a request at given offset. Only valid for - * file system requests. - */ -static inline unsigned int blk_max_size_offset(struct request_queue *q, - sector_t offset, - unsigned int chunk_sectors) -{ - if (!chunk_sectors) { - if (q->limits.chunk_sectors) - chunk_sectors = q->limits.chunk_sectors; - else - return q->limits.max_sectors; - } - - return min(q->limits.max_sectors, - blk_chunk_sectors_left(offset, chunk_sectors)); -} - /* * Access functions for manipulating queue properties */ From 2a9336c42a6abdcef564d522648a684a474a3483 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Jun 2022 11:09:34 +0200 Subject: [PATCH 021/178] block: move blk_queue_get_max_sectors to blk.h blk_queue_get_max_sectors is private to the block layer, so move it out of blkdev.h. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220614090934.570632-7-hch@lst.de Signed-off-by: Jens Axboe --- block/blk.h | 13 +++++++++++++ include/linux/blkdev.h | 13 ------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/block/blk.h b/block/blk.h index 434017701403..8e79296ee97a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -159,6 +159,19 @@ static inline bool blk_discard_mergable(struct request *req) return false; } +static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, + int op) +{ + if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) + return min(q->limits.max_discard_sectors, + UINT_MAX >> SECTOR_SHIFT); + + if (unlikely(op == REQ_OP_WRITE_ZEROES)) + return q->limits.max_write_zeroes_sectors; + + return q->limits.max_sectors; +} + #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 652c357dafb9..b2d42201bd5d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -921,19 +921,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) } #endif /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - int op) -{ - if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) - return min(q->limits.max_discard_sectors, - UINT_MAX >> SECTOR_SHIFT); - - if (unlikely(op == REQ_OP_WRITE_ZEROES)) - return q->limits.max_write_zeroes_sectors; - - return q->limits.max_sectors; -} - /* * Return how much of the chunk is left to be used for I/O at a given offset. */ From 6c77b152f5f1324972cdbdb71e9a6e02d601f49f Mon Sep 17 00:00:00 2001 From: GuoYong Zheng Date: Fri, 17 Jun 2022 18:28:04 +0800 Subject: [PATCH 022/178] bfq: Remove useless code in bfq_lookup_next_entity It is no need to judge entity is null or not here, directly return entity is ok, so remove it. Signed-off-by: GuoYong Zheng Link: https://lore.kernel.org/r/1655461684-19075-1-git-send-email-zhenggy@chinatelecom.cn Signed-off-by: Jens Axboe --- block/bfq-wf2q.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index f8eb340381cf..089d07022066 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1472,9 +1472,6 @@ static struct bfq_entity *bfq_lookup_next_entity(struct bfq_sched_data *sd, break; } - if (!entity) - return NULL; - return entity; } From c28c49b09e493adf5f79201d6de2d16d9356e9eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 17 Jun 2022 13:44:33 -0700 Subject: [PATCH 023/178] block: bfq: Remove an unused function definition This patch is the result of the analysis of a sparse report. Cc: Jan Kara Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220617204433.102022-1-bvanassche@acm.org --- block/bfq-cgroup.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 09574af83566..dc0fa93219df 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1471,8 +1471,6 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) return bfqq->bfqd->root_group; } -void bfqg_and_blkg_get(struct bfq_group *bfqg) {} - void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) From 1d87be8212c8c2bb1216a0ba49373e4e0123aaf3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 17 Jun 2022 14:08:59 -0700 Subject: [PATCH 024/178] block: bfq: Fix kernel-doc headers Fix the following warnings: block/bfq-cgroup.c:721: warning: Function parameter or member 'bfqg' not described in '__bfq_bic_change_cgroup' block/bfq-cgroup.c:721: warning: Excess function parameter 'blkcg' description in '__bfq_bic_change_cgroup' block/bfq-cgroup.c:870: warning: Function parameter or member 'ioprio_class' not described in 'bfq_reparent_leaf_entity' block/bfq-cgroup.c:900: warning: Function parameter or member 'ioprio_class' not described in 'bfq_reparent_active_queues' Cc: Jan Kara Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe Link: https://lore.kernel.org/r/20220617210859.106623-1-bvanassche@acm.org --- block/bfq-cgroup.c | 6 ++++-- block/bfq-wf2q.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index dc0fa93219df..9fc605791b1e 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -706,10 +706,10 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, } /** - * __bfq_bic_change_cgroup - move @bic to @cgroup. + * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. - * @blkcg: the blk-cgroup to move to. + * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see @@ -863,6 +863,7 @@ static void bfq_flush_idle_tree(struct bfq_service_tree *st) * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. + * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, @@ -892,6 +893,7 @@ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. + * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 089d07022066..983413cdefad 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -1360,6 +1360,8 @@ left: /** * __bfq_lookup_next_entity - return the first eligible entity in @st. * @st: the service tree. + * @in_service: whether or not there is an in-service entity for the sched_data + * this active tree belongs to. * * If there is no in-service entity for the sched_data st belongs to, * then return the entity that will be set in service if: From 3c8f9da41ed90294d8ca42b3ad8a13c5379bd549 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 22 Jun 2022 10:25:54 +0200 Subject: [PATCH 025/178] blk-mq: Don't disable preemption around __blk_mq_run_hw_queue(). __blk_mq_delay_run_hw_queue() disables preemption to get a stable current CPU number and then invokes __blk_mq_run_hw_queue() if the CPU number is part the mask. __blk_mq_run_hw_queue() acquires a spin_lock_t which is a sleeping lock on PREEMPT_RT and can't be acquired with disabled preemption. It is not required for correctness to invoke __blk_mq_run_hw_queue() on a CPU matching hctx->cpumask. Both (async and direct requests) can run on a CPU not matching hctx->cpumask. The CPU mask without disabling preemption and invoking __blk_mq_run_hw_queue(). Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/YrLSEiNvagKJaDs5@linutronix.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index fa3dc4f8f35d..62b7025d6854 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2085,14 +2085,10 @@ static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async, return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { - int cpu = get_cpu(); - if (cpumask_test_cpu(cpu, hctx->cpumask)) { + if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { __blk_mq_run_hw_queue(hctx); - put_cpu(); return; } - - put_cpu(); } kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, From e589f46445960c274cc813a1cc8e2fc73b2a1849 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:26 +0200 Subject: [PATCH 026/178] block: fix default IO priority handling again Commit e70344c05995 ("block: fix default IO priority handling") introduced an inconsistency in get_current_ioprio() that tasks without IO context return IOPRIO_DEFAULT priority while tasks with freshly allocated IO context will return 0 (IOPRIO_CLASS_NONE/0) IO priority. Tasks without IO context used to be rare before 5a9d041ba2f6 ("block: move io_context creation into where it's needed") but after this commit they became common because now only BFQ IO scheduler setups task's IO context. Similar inconsistency is there for get_task_ioprio() so this inconsistency is now exposed to userspace and userspace will see different IO priority for tasks operating on devices with BFQ compared to devices without BFQ. Furthemore the changes done by commit e70344c05995 change the behavior when no IO priority is set for BFQ IO scheduler which is also documented in ioprio_set(2) manpage: "If no I/O scheduler has been set for a thread, then by default the I/O priority will follow the CPU nice value (setpriority(2)). In Linux kernels before version 2.6.24, once an I/O priority had been set using ioprio_set(), there was no way to reset the I/O scheduling behavior to the default. Since Linux 2.6.24, specifying ioprio as 0 can be used to reset to the default I/O scheduling behavior." So make sure we default to IOPRIO_CLASS_NONE as used to be the case before commit e70344c05995. Also cleanup alloc_io_context() to explicitely set this IO priority for the allocated IO context to avoid future surprises. Note that we tweak ioprio_best() to maintain ioprio_get(2) behavior and make this commit easily backportable. CC: stable@vger.kernel.org Fixes: e70344c05995 ("block: fix default IO priority handling") Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-1-jack@suse.cz Signed-off-by: Jens Axboe --- block/blk-ioc.c | 2 ++ block/ioprio.c | 4 ++-- include/linux/ioprio.h | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/block/blk-ioc.c b/block/blk-ioc.c index df9cfe4ca532..63fc02042408 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -247,6 +247,8 @@ static struct io_context *alloc_io_context(gfp_t gfp_flags, int node) INIT_HLIST_HEAD(&ioc->icq_list); INIT_WORK(&ioc->release_work, ioc_release_fn); #endif + ioc->ioprio = IOPRIO_DEFAULT; + return ioc; } diff --git a/block/ioprio.c b/block/ioprio.c index 2fe068fcaad5..2a34cbca18ae 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -157,9 +157,9 @@ out: int ioprio_best(unsigned short aprio, unsigned short bprio) { if (!ioprio_valid(aprio)) - aprio = IOPRIO_DEFAULT; + aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); if (!ioprio_valid(bprio)) - bprio = IOPRIO_DEFAULT; + bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); return min(aprio, bprio); } diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3f53bc27a19b..3d088a88f832 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -11,7 +11,7 @@ /* * Default IO priority. */ -#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) +#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0) /* * Check that a priority value has a valid class. From f7eda402878b12bc0884c5bc1192a9e76ad121fb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:27 +0200 Subject: [PATCH 027/178] block: Return effective IO priority from get_current_ioprio() get_current_ioprio() is used to initialize IO priority of various requests. As such it should be returning the effective IO priority of the task (i.e., reflecting the fact that unset IO priority should get set based on task's CPU priority) so that the conversion is concentrated in one place. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-2-jack@suse.cz Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 3d088a88f832..61ed6bb4998e 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -53,10 +53,17 @@ static inline int task_nice_ioclass(struct task_struct *task) static inline int get_current_ioprio(void) { struct io_context *ioc = current->io_context; + int prio; if (ioc) - return ioc->ioprio; - return IOPRIO_DEFAULT; + prio = ioc->ioprio; + else + prio = IOPRIO_DEFAULT; + + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), + task_nice_ioprio(current)); + return prio; } /* From 893e5d32d5832674bcf6465f27958e883b72b346 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:28 +0200 Subject: [PATCH 028/178] block: Generalize get_current_ioprio() for any task get_current_ioprio() operates only on current task. We will need the same functionality for other tasks as well. Generalize get_current_ioprio() for that and also move the bulk out of the header file because it is large enough. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-3-jack@suse.cz Signed-off-by: Jens Axboe --- block/ioprio.c | 26 ++++++++++++++++++++++++++ include/linux/ioprio.h | 26 ++++++++++---------------- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/block/ioprio.c b/block/ioprio.c index 2a34cbca18ae..c4e3476155a1 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -138,6 +138,32 @@ out: return ret; } +/* + * If the task has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + * + * Expected to be called for current task or with task_lock() held to keep + * io_context stable. + */ +int __get_task_ioprio(struct task_struct *p) +{ + struct io_context *ioc = p->io_context; + int prio; + + if (p != current) + lockdep_assert_held(&p->alloc_lock); + if (ioc) + prio = ioc->ioprio; + else + prio = IOPRIO_DEFAULT; + + if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) + prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p), + task_nice_ioprio(p)); + return prio; +} +EXPORT_SYMBOL_GPL(__get_task_ioprio); + static int get_task_ioprio(struct task_struct *p) { int ret; diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 61ed6bb4998e..9752cf4a9c7c 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -46,24 +46,18 @@ static inline int task_nice_ioclass(struct task_struct *task) return IOPRIO_CLASS_BE; } -/* - * If the calling process has set an I/O priority, use that. Otherwise, return - * the default I/O priority. - */ +#ifdef CONFIG_BLOCK +int __get_task_ioprio(struct task_struct *p); +#else +static inline int __get_task_ioprio(struct task_struct *p) +{ + return IOPRIO_DEFAULT; +} +#endif /* CONFIG_BLOCK */ + static inline int get_current_ioprio(void) { - struct io_context *ioc = current->io_context; - int prio; - - if (ioc) - prio = ioc->ioprio; - else - prio = IOPRIO_DEFAULT; - - if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE) - prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), - task_nice_ioprio(current)); - return prio; + return __get_task_ioprio(current); } /* From fc25545e17bd74befe0b8ab2c65ac84936be5066 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:29 +0200 Subject: [PATCH 029/178] block: Make ioprio_best() static Nobody outside of block/ioprio.c uses it. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-4-jack@suse.cz Signed-off-by: Jens Axboe --- block/ioprio.c | 2 +- include/linux/ioprio.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/block/ioprio.c b/block/ioprio.c index c4e3476155a1..8c46f672a0ba 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -180,7 +180,7 @@ out: return ret; } -int ioprio_best(unsigned short aprio, unsigned short bprio) +static int ioprio_best(unsigned short aprio, unsigned short bprio) { if (!ioprio_valid(aprio)) aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9752cf4a9c7c..7578d4f6a969 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -60,11 +60,6 @@ static inline int get_current_ioprio(void) return __get_task_ioprio(current); } -/* - * For inheritance, return the highest of the two given priorities - */ -extern int ioprio_best(unsigned short aprio, unsigned short bprio); - extern int set_task_ioprio(struct task_struct *task, int ioprio); #ifdef CONFIG_BLOCK From 4b838d9ee950b37bee624e301bd8e923165b1cf3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:30 +0200 Subject: [PATCH 030/178] block: Fix handling of tasks without ioprio in ioprio_get(2) ioprio_get(2) can be asked to return the best IO priority from several tasks (IOPRIO_WHO_PGRP, IOPRIO_WHO_USER). Currently the call treats tasks without set IO priority as having priority IOPRIO_CLASS_BE/IOPRIO_BE_NORM however this does not really reflect the IO priority the task will get (which depends on task's nice value). Fix the code to use the real IO priority task's IO will use. We have to modify code for ioprio_get(IOPRIO_WHO_PROCESS) to keep returning IOPRIO_CLASS_NONE priority for tasks without set IO priority as a special case to maintain userspace visible API. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20220623074840.5960-5-jack@suse.cz Signed-off-by: Jens Axboe --- block/ioprio.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/block/ioprio.c b/block/ioprio.c index 8c46f672a0ba..32a456b45804 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -171,10 +171,31 @@ static int get_task_ioprio(struct task_struct *p) ret = security_task_getioprio(p); if (ret) goto out; - ret = IOPRIO_DEFAULT; + task_lock(p); + ret = __get_task_ioprio(p); + task_unlock(p); +out: + return ret; +} + +/* + * Return raw IO priority value as set by userspace. We use this for + * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and + * also so that userspace can distinguish unset IO priority (which just gets + * overriden based on task's nice value) from IO priority set to some value. + */ +static int get_task_raw_ioprio(struct task_struct *p) +{ + int ret; + + ret = security_task_getioprio(p); + if (ret) + goto out; task_lock(p); if (p->io_context) ret = p->io_context->ioprio; + else + ret = IOPRIO_DEFAULT; task_unlock(p); out: return ret; @@ -182,11 +203,6 @@ out: static int ioprio_best(unsigned short aprio, unsigned short bprio) { - if (!ioprio_valid(aprio)) - aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); - if (!ioprio_valid(bprio)) - bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM); - return min(aprio, bprio); } @@ -207,7 +223,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) else p = find_task_by_vpid(who); if (p) - ret = get_task_ioprio(p); + ret = get_task_raw_ioprio(p); break; case IOPRIO_WHO_PGRP: if (!who) From f25865447294bf2468c2587dd98f8fa999260893 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:31 +0200 Subject: [PATCH 031/178] blk-ioprio: Remove unneeded field blkcg->ioprio_set field is not really useful except for avoiding possibly more expensive checks inside blkcg_ioprio_track(). The check for blkcg->prio_policy being equal to POLICY_NO_CHANGE does the same service so just remove the ioprio_set field and replace the check. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-6-jack@suse.cz Signed-off-by: Jens Axboe --- block/blk-ioprio.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c index 79e797f5d194..3f605583598b 100644 --- a/block/blk-ioprio.c +++ b/block/blk-ioprio.c @@ -62,7 +62,6 @@ struct ioprio_blkg { struct ioprio_blkcg { struct blkcg_policy_data cpd; enum prio_policy prio_policy; - bool prio_set; }; static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd) @@ -113,7 +112,6 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf, if (ret < 0) return ret; blkcg->prio_policy = ret; - blkcg->prio_set = true; return nbytes; } @@ -193,16 +191,15 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq, struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio); u16 prio; - if (!blkcg->prio_set) + if (blkcg->prio_policy == POLICY_NO_CHANGE) return; /* * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers * correspond to a lower priority. Hence, the max_t() below selects * the lower priority of bi_ioprio and the cgroup I/O priority class. - * If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the - * bio I/O priority is not modified. If the bio I/O priority equals - * IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio. + * If the bio I/O priority equals IOPRIO_CLASS_NONE, the cgroup I/O + * priority is assigned to the bio. */ prio = max_t(u16, bio->bi_ioprio, IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0)); From 82b74cac28493fb40ea74fb2fe648b5fc7ea0c1c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:32 +0200 Subject: [PATCH 032/178] blk-ioprio: Convert from rqos policy to direct call Convert blk-ioprio handling from a rqos policy to a direct call from blk_mq_submit_bio(). Firstly, blk-ioprio is not much of a rqos policy anyway, it just needs a hook in bio submission path to set the bio's IO priority. Secondly, the rqos .track hook gets actually called too late for blk-ioprio purposes and introducing a special rqos hook just for blk-ioprio looks even weirder. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-7-jack@suse.cz Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 1 + block/blk-ioprio.c | 50 +++++----------------------------------------- block/blk-ioprio.h | 9 +++++++++ block/blk-mq.c | 8 ++++++++ 4 files changed, 23 insertions(+), 45 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 764e740b0c0f..6906981563f8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1299,6 +1299,7 @@ int blkcg_init_queue(struct request_queue *q) ret = blk_iolatency_init(q); if (ret) { blk_throtl_exit(q); + blk_ioprio_exit(q); goto err_destroy_all; } diff --git a/block/blk-ioprio.c b/block/blk-ioprio.c index 3f605583598b..c00060a02c6e 100644 --- a/block/blk-ioprio.c +++ b/block/blk-ioprio.c @@ -181,17 +181,12 @@ static struct blkcg_policy ioprio_policy = { .pd_free_fn = ioprio_free_pd, }; -struct blk_ioprio { - struct rq_qos rqos; -}; - -static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq, - struct bio *bio) +void blkcg_set_ioprio(struct bio *bio) { struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio); u16 prio; - if (blkcg->prio_policy == POLICY_NO_CHANGE) + if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE) return; /* @@ -207,49 +202,14 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq, bio->bi_ioprio = prio; } -static void blkcg_ioprio_exit(struct rq_qos *rqos) +void blk_ioprio_exit(struct request_queue *q) { - struct blk_ioprio *blkioprio_blkg = - container_of(rqos, typeof(*blkioprio_blkg), rqos); - - blkcg_deactivate_policy(rqos->q, &ioprio_policy); - kfree(blkioprio_blkg); + blkcg_deactivate_policy(q, &ioprio_policy); } -static struct rq_qos_ops blkcg_ioprio_ops = { - .track = blkcg_ioprio_track, - .exit = blkcg_ioprio_exit, -}; - int blk_ioprio_init(struct request_queue *q) { - struct blk_ioprio *blkioprio_blkg; - struct rq_qos *rqos; - int ret; - - blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL); - if (!blkioprio_blkg) - return -ENOMEM; - - ret = blkcg_activate_policy(q, &ioprio_policy); - if (ret) { - kfree(blkioprio_blkg); - return ret; - } - - rqos = &blkioprio_blkg->rqos; - rqos->id = RQ_QOS_IOPRIO; - rqos->ops = &blkcg_ioprio_ops; - rqos->q = q; - - /* - * Registering the rq-qos policy after activating the blk-cgroup - * policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the - * rq-qos callbacks. - */ - rq_qos_add(q, rqos); - - return 0; + return blkcg_activate_policy(q, &ioprio_policy); } static int __init ioprio_init(void) diff --git a/block/blk-ioprio.h b/block/blk-ioprio.h index a7785c2f1aea..5a1eb550e178 100644 --- a/block/blk-ioprio.h +++ b/block/blk-ioprio.h @@ -6,14 +6,23 @@ #include struct request_queue; +struct bio; #ifdef CONFIG_BLK_CGROUP_IOPRIO int blk_ioprio_init(struct request_queue *q); +void blk_ioprio_exit(struct request_queue *q); +void blkcg_set_ioprio(struct bio *bio); #else static inline int blk_ioprio_init(struct request_queue *q) { return 0; } +static inline void blk_ioprio_exit(struct request_queue *q) +{ +} +static inline void blkcg_set_ioprio(struct bio *bio) +{ +} #endif #endif /* _BLK_IOPRIO_H_ */ diff --git a/block/blk-mq.c b/block/blk-mq.c index 62b7025d6854..4c4944b6f520 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -42,6 +42,7 @@ #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" +#include "blk-ioprio.h" static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); @@ -2779,6 +2780,11 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, return rq; } +static void bio_set_ioprio(struct bio *bio) +{ + blkcg_set_ioprio(bio); +} + /** * blk_mq_submit_bio - Create and send a request to block device. * @bio: Bio pointer. @@ -2819,6 +2825,8 @@ void blk_mq_submit_bio(struct bio *bio) trace_block_getrq(bio); + bio_set_ioprio(bio); + rq_qos_track(q, rq, bio); blk_mq_bio_to_request(rq, bio, nr_segs); From 9c6227e04355a430aa59709bbf869d9126112d0d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:33 +0200 Subject: [PATCH 033/178] block: Initialize bio priority earlier Bio's IO priority needs to be initialized before we try to merge the bio with other bios. Otherwise we could merge bios which would otherwise receive different IO priorities leading to possible QoS issues. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-8-jack@suse.cz Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4c4944b6f520..c0ec1938feee 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2814,6 +2814,8 @@ void blk_mq_submit_bio(struct bio *bio) if (!bio_integrity_prep(bio)) return; + bio_set_ioprio(bio); + rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs); if (!rq) { if (!bio) @@ -2825,8 +2827,6 @@ void blk_mq_submit_bio(struct bio *bio) trace_block_getrq(bio); - bio_set_ioprio(bio); - rq_qos_track(q, rq, bio); blk_mq_bio_to_request(rq, bio, nr_segs); From a78418e6a04c93b9ffd3f0f601c5cb10612acb7f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Jun 2022 09:48:34 +0200 Subject: [PATCH 034/178] block: Always initialize bio IO priority on submit Currently, IO priority set in task's IO context is not reflected in the bio->bi_ioprio for most IO (only io_uring and direct IO set it). This results in odd results where process is submitting some bios with one priority and other bios with a different (unset) priority and due to differing priorities bios cannot be merged. Make sure bio->bi_ioprio is always set on bio submission. Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220623074840.5960-9-jack@suse.cz Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index c0ec1938feee..92aae03103b7 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2782,6 +2782,9 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, static void bio_set_ioprio(struct bio *bio) { + /* Nobody set ioprio so far? Initialize it based on task's nice value */ + if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) == IOPRIO_CLASS_NONE) + bio->bi_ioprio = get_current_ioprio(); blkcg_set_ioprio(bio); } From ee78ec1077d37d1a4a0860589a65df8ae6d2255c Mon Sep 17 00:00:00 2001 From: Liu Song Date: Sat, 25 Jun 2022 23:15:21 +0800 Subject: [PATCH 035/178] blk-mq: blk_mq_tag_busy is no need to return a value Currently "blk_mq_tag_busy" return value has no effect, so adjust it. Some code implementations have also been adjusted to enhance readability. Signed-off-by: Liu Song Link: https://lore.kernel.org/r/1656170121-1619-1-git-send-email-liusong@linux.alibaba.com Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 18 +++++++----------- block/blk-mq-tag.h | 10 ++++------ 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 2dcd738c6952..3cfffef1feb3 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -37,29 +37,25 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, * to get tag when first time, the other shared-tag users could reserve * budget for it. */ -bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) +void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { unsigned int users; if (blk_mq_is_shared_tags(hctx->flags)) { struct request_queue *q = hctx->queue; - if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) || - test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) { - return true; - } + if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) + return; + set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags); } else { - if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) || - test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) { - return true; - } + if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) + return; + set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state); } users = atomic_inc_return(&hctx->tags->active_queues); blk_mq_update_wake_batch(hctx->tags, users); - - return true; } /* diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 5668e28be0b7..91ff37e3b43d 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -47,15 +47,13 @@ enum { BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1, }; -extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); +extern void __blk_mq_tag_busy(struct blk_mq_hw_ctx *); extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); -static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) +static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { - if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) - return false; - - return __blk_mq_tag_busy(hctx); + if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) + __blk_mq_tag_busy(hctx); } static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) From ec5263f422a3364442e0db2d9c2866d9154cbcc4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:47 +0200 Subject: [PATCH 036/178] mtip32xx: remove the device_status debugfs file This file is a huge mess that iterates over all devices and is in the way of fixing the device removal in this driver, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 141 +----------------------------- drivers/block/mtip32xx/mtip32xx.h | 4 - 2 files changed, 1 insertion(+), 144 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 27386a572ba4..4151c80f5bfc 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -94,17 +94,12 @@ /* Device instance number, incremented each time a device is probed. */ static int instance; -static LIST_HEAD(online_list); -static LIST_HEAD(removing_list); -static DEFINE_SPINLOCK(dev_lock); - /* * Global variable used to hold the major block device number * allocated in mtip_init(). */ static int mtip_major; static struct dentry *dfs_parent; -static struct dentry *dfs_device_status; static u32 cpu_use[NR_CPUS]; @@ -2170,106 +2165,6 @@ static const struct attribute_group *mtip_disk_attr_groups[] = { NULL, }; -/* debugsfs entries */ - -static ssize_t show_device_status(struct device_driver *drv, char *buf) -{ - int size = 0; - struct driver_data *dd, *tmp; - unsigned long flags; - char id_buf[42]; - u16 status = 0; - - spin_lock_irqsave(&dev_lock, flags); - size += sprintf(&buf[size], "Devices Present:\n"); - list_for_each_entry_safe(dd, tmp, &online_list, online_list) { - if (dd->pdev) { - if (dd->port && - dd->port->identify && - dd->port->identify_valid) { - strlcpy(id_buf, - (char *) (dd->port->identify + 10), 21); - status = *(dd->port->identify + 141); - } else { - memset(id_buf, 0, 42); - status = 0; - } - - if (dd->port && - test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { - size += sprintf(&buf[size], - " device %s %s (ftl rebuild %d %%)\n", - dev_name(&dd->pdev->dev), - id_buf, - status); - } else { - size += sprintf(&buf[size], - " device %s %s\n", - dev_name(&dd->pdev->dev), - id_buf); - } - } - } - - size += sprintf(&buf[size], "Devices Being Removed:\n"); - list_for_each_entry_safe(dd, tmp, &removing_list, remove_list) { - if (dd->pdev) { - if (dd->port && - dd->port->identify && - dd->port->identify_valid) { - strlcpy(id_buf, - (char *) (dd->port->identify+10), 21); - status = *(dd->port->identify + 141); - } else { - memset(id_buf, 0, 42); - status = 0; - } - - if (dd->port && - test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags)) { - size += sprintf(&buf[size], - " device %s %s (ftl rebuild %d %%)\n", - dev_name(&dd->pdev->dev), - id_buf, - status); - } else { - size += sprintf(&buf[size], - " device %s %s\n", - dev_name(&dd->pdev->dev), - id_buf); - } - } - } - spin_unlock_irqrestore(&dev_lock, flags); - - return size; -} - -static ssize_t mtip_hw_read_device_status(struct file *f, char __user *ubuf, - size_t len, loff_t *offset) -{ - int size = *offset; - char *buf; - int rv = 0; - - if (!len || *offset) - return 0; - - buf = kzalloc(MTIP_DFS_MAX_BUF_SIZE, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - size += show_device_status(NULL, buf); - - *offset = size <= len ? size : len; - size = copy_to_user(ubuf, buf, *offset); - if (size) - rv = -EFAULT; - - kfree(buf); - return rv ? rv : *offset; -} - static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf, size_t len, loff_t *offset) { @@ -2363,13 +2258,6 @@ static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf, return rv ? rv : *offset; } -static const struct file_operations mtip_device_status_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = mtip_hw_read_device_status, - .llseek = no_llseek, -}; - static const struct file_operations mtip_regs_fops = { .owner = THIS_MODULE, .open = simple_open, @@ -3905,7 +3793,6 @@ static int mtip_pci_probe(struct pci_dev *pdev, const struct cpumask *node_mask; int cpu, i = 0, j = 0; int my_node = NUMA_NO_NODE; - unsigned long flags; /* Allocate memory for this devices private data. */ my_node = pcibus_to_node(pdev->bus); @@ -3952,9 +3839,6 @@ static int mtip_pci_probe(struct pci_dev *pdev, dd->pdev = pdev; dd->numa_node = my_node; - INIT_LIST_HEAD(&dd->online_list); - INIT_LIST_HEAD(&dd->remove_list); - memset(dd->workq_name, 0, 32); snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); @@ -4047,11 +3931,6 @@ static int mtip_pci_probe(struct pci_dev *pdev, else rv = 0; /* device in rebuild state, return 0 from probe */ - /* Add to online list even if in ftl rebuild */ - spin_lock_irqsave(&dev_lock, flags); - list_add(&dd->online_list, &online_list); - spin_unlock_irqrestore(&dev_lock, flags); - goto done; block_initialize_err: @@ -4085,15 +3964,10 @@ done: static void mtip_pci_remove(struct pci_dev *pdev) { struct driver_data *dd = pci_get_drvdata(pdev); - unsigned long flags, to; + unsigned long to; set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag); - spin_lock_irqsave(&dev_lock, flags); - list_del_init(&dd->online_list); - list_add(&dd->remove_list, &removing_list); - spin_unlock_irqrestore(&dev_lock, flags); - mtip_check_surprise_removal(dd); synchronize_irq(dd->pdev->irq); @@ -4124,10 +3998,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); - spin_lock_irqsave(&dev_lock, flags); - list_del_init(&dd->remove_list); - spin_unlock_irqrestore(&dev_lock, flags); - kfree(dd); pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); @@ -4250,15 +4120,6 @@ static int __init mtip_init(void) pr_warn("Error creating debugfs parent\n"); dfs_parent = NULL; } - if (dfs_parent) { - dfs_device_status = debugfs_create_file("device_status", - 0444, dfs_parent, NULL, - &mtip_device_status_fops); - if (IS_ERR_OR_NULL(dfs_device_status)) { - pr_err("Error creating device_status node\n"); - dfs_device_status = NULL; - } - } /* Register our PCI operations. */ error = pci_register_driver(&mtip_pci_driver); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 6816beb45352..a80419c57bbe 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -462,10 +462,6 @@ struct driver_data { int isr_binding; - struct list_head online_list; /* linkage for online list */ - - struct list_head remove_list; /* linkage for removing list */ - int unal_qdepth; /* qdepth of unaligned IO queue */ }; From e8b58ef09e84c15cf782b01cfc73cc5b1180d519 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:48 +0200 Subject: [PATCH 037/178] mtip32xx: fix device removal Use the proper helper to mark a surpise removal, remove the gendisk as soon as possible when removing the device and implement the ->free_disk callback to ensure the private data is alive as long as the gendisk has references. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/mtip32xx/mtip32xx.c | 157 +++++++++--------------------- drivers/block/mtip32xx/mtip32xx.h | 1 - 2 files changed, 44 insertions(+), 114 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 4151c80f5bfc..e7604b3bf8a7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -141,11 +141,8 @@ static bool mtip_check_surprise_removal(struct driver_data *dd) pci_read_config_word(dd->pdev, 0x00, &vendor_id); if (vendor_id == 0xFFFF) { dd->sr = true; - if (dd->queue) - blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue); - else - dev_warn(&dd->pdev->dev, - "%s: dd->queue is NULL\n", __func__); + if (dd->disk) + blk_mark_disk_dead(dd->disk); return true; /* device removed */ } @@ -3185,26 +3182,12 @@ static int mtip_block_getgeo(struct block_device *dev, return 0; } -static int mtip_block_open(struct block_device *dev, fmode_t mode) +static void mtip_block_free_disk(struct gendisk *disk) { - struct driver_data *dd; + struct driver_data *dd = disk->private_data; - if (dev && dev->bd_disk) { - dd = (struct driver_data *) dev->bd_disk->private_data; - - if (dd) { - if (test_bit(MTIP_DDF_REMOVAL_BIT, - &dd->dd_flag)) { - return -ENODEV; - } - return 0; - } - } - return -ENODEV; -} - -static void mtip_block_release(struct gendisk *disk, fmode_t mode) -{ + ida_free(&rssd_index_ida, dd->index); + kfree(dd); } /* @@ -3214,13 +3197,12 @@ static void mtip_block_release(struct gendisk *disk, fmode_t mode) * layer. */ static const struct block_device_operations mtip_block_ops = { - .open = mtip_block_open, - .release = mtip_block_release, .ioctl = mtip_block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mtip_block_compat_ioctl, #endif .getgeo = mtip_block_getgeo, + .free_disk = mtip_block_free_disk, .owner = THIS_MODULE }; @@ -3561,72 +3543,6 @@ protocol_init_error: return rv; } -static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) -{ - struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - - cmd->status = BLK_STS_IOERR; - blk_mq_complete_request(rq); - return true; -} - -/* - * Block layer deinitialization function. - * - * Called by the PCI layer as each P320 device is removed. - * - * @dd Pointer to the driver data structure. - * - * return value - * 0 - */ -static int mtip_block_remove(struct driver_data *dd) -{ - mtip_hw_debugfs_exit(dd); - - if (dd->mtip_svc_handler) { - set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); - wake_up_interruptible(&dd->port->svc_wait); - kthread_stop(dd->mtip_svc_handler); - } - - if (!dd->sr) { - /* - * Explicitly wait here for IOs to quiesce, - * as mtip_standby_drive usually won't wait for IOs. - */ - if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) - mtip_standby_drive(dd); - } - else - dev_info(&dd->pdev->dev, "device %s surprise removal\n", - dd->disk->disk_name); - - blk_freeze_queue_start(dd->queue); - blk_mq_quiesce_queue(dd->queue); - blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); - blk_mq_unquiesce_queue(dd->queue); - - if (dd->disk) { - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) - del_gendisk(dd->disk); - if (dd->disk->queue) { - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - dd->queue = NULL; - } - put_disk(dd->disk); - } - dd->disk = NULL; - - ida_free(&rssd_index_ida, dd->index); - - /* De-initialize the protocol layer. */ - mtip_hw_exit(dd); - - return 0; -} - /* * Function called by the PCI layer when just before the * machine shuts down. @@ -3643,23 +3559,15 @@ static int mtip_block_shutdown(struct driver_data *dd) { mtip_hw_shutdown(dd); - /* Delete our gendisk structure, and cleanup the blk queue. */ - if (dd->disk) { - dev_info(&dd->pdev->dev, - "Shutting down %s ...\n", dd->disk->disk_name); + dev_info(&dd->pdev->dev, + "Shutting down %s ...\n", dd->disk->disk_name); - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) - del_gendisk(dd->disk); - if (dd->disk->queue) { - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - } - put_disk(dd->disk); - dd->disk = NULL; - dd->queue = NULL; - } + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); - ida_free(&rssd_index_ida, dd->index); + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + put_disk(dd->disk); return 0; } @@ -3966,8 +3874,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) struct driver_data *dd = pci_get_drvdata(pdev); unsigned long to; - set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag); - mtip_check_surprise_removal(dd); synchronize_irq(dd->pdev->irq); @@ -3983,11 +3889,36 @@ static void mtip_pci_remove(struct pci_dev *pdev) "Completion workers still active!\n"); } - blk_mark_disk_dead(dd->disk); set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); - /* Clean up the block layer. */ - mtip_block_remove(dd); + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); + + mtip_hw_debugfs_exit(dd); + + if (dd->mtip_svc_handler) { + set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); + wake_up_interruptible(&dd->port->svc_wait); + kthread_stop(dd->mtip_svc_handler); + } + + if (!dd->sr) { + /* + * Explicitly wait here for IOs to quiesce, + * as mtip_standby_drive usually won't wait for IOs. + */ + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) + mtip_standby_drive(dd); + } + else + dev_info(&dd->pdev->dev, "device %s surprise removal\n", + dd->disk->disk_name); + + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + + /* De-initialize the protocol layer. */ + mtip_hw_exit(dd); if (dd->isr_workq) { destroy_workqueue(dd->isr_workq); @@ -3998,10 +3929,10 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); - kfree(dd); - pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); + + put_disk(dd->disk); } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index a80419c57bbe..f7328f19ac5c 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -149,7 +149,6 @@ enum { MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_INIT_DONE_BIT = 7, MTIP_DDF_REBUILD_FAILED_BIT = 8, - MTIP_DDF_REMOVAL_BIT = 9, MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | (1 << MTIP_DDF_SEC_LOCK_BIT) | From 1f90307e5f0d7bc9a336ead528f616a5df8e5944 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:49 +0200 Subject: [PATCH 038/178] block: remove QUEUE_FLAG_DEAD Disallow setting the blk-mq state on any queue that is already dying as setting the state even then is a bad idea, and remove the now unused QUEUE_FLAG_DEAD flag. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 3 --- block/blk-mq-debugfs.c | 8 +++----- include/linux/blkdev.h | 2 -- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index c2cec402d01c..f86df390afad 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -313,9 +313,6 @@ void blk_cleanup_queue(struct request_queue *q) * after draining finished. */ blk_freeze_queue(q); - - blk_queue_flag_set(QUEUE_FLAG_DEAD, q); - blk_sync_queue(q); if (queue_is_mq(q)) { blk_mq_cancel_work_sync(q); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4d1ce9ef4318..b80fae7ab1d9 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -116,7 +116,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(NOXMERGES), QUEUE_FLAG_NAME(ADD_RANDOM), QUEUE_FLAG_NAME(SAME_FORCE), - QUEUE_FLAG_NAME(DEAD), QUEUE_FLAG_NAME(INIT_DONE), QUEUE_FLAG_NAME(STABLE_WRITES), QUEUE_FLAG_NAME(POLL), @@ -151,11 +150,10 @@ static ssize_t queue_state_write(void *data, const char __user *buf, char opbuf[16] = { }, *op; /* - * The "state" attribute is removed after blk_cleanup_queue() has called - * blk_mq_free_queue(). Return if QUEUE_FLAG_DEAD has been set to avoid - * triggering a use-after-free. + * The "state" attribute is removed when the queue is removed. Don't + * allow setting the state on a dying queue to avoid a use-after-free. */ - if (blk_queue_dead(q)) + if (blk_queue_dying(q)) return -ENOENT; if (count >= sizeof(opbuf)) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b2d42201bd5d..f4632f4fe884 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -564,7 +564,6 @@ struct request_queue { #define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */ #define QUEUE_FLAG_SAME_FORCE 12 /* force complete on same CPU */ -#define QUEUE_FLAG_DEAD 13 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 14 /* queue is initialized */ #define QUEUE_FLAG_STABLE_WRITES 15 /* don't modify blks until WB is done */ #define QUEUE_FLAG_POLL 16 /* IO polling enabled if set */ @@ -592,7 +591,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags) -#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ From 0e3534022f26ae51f7cf28347a253230604b6f4e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:50 +0200 Subject: [PATCH 039/178] block: stop setting the nomerges flags in blk_cleanup_queue These flags only apply to file system I/O, and all file system I/O is already drained by del_gendisk and thus can't be in progress when blk_cleanup_queue is called. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index f86df390afad..04029ffea031 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -304,9 +304,6 @@ void blk_cleanup_queue(struct request_queue *q) blk_queue_flag_set(QUEUE_FLAG_DYING, q); blk_queue_start_drain(q); - blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); - blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); - /* * Drain all requests queued before DYING marking. Set DEAD flag to * prevent that blk_mq_run_hw_queues() accesses the hardware queues From 6f8191fdf41d3a53cc1d63fe2234e812c55a0092 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:51 +0200 Subject: [PATCH 040/178] block: simplify disk shutdown Set the queue dying flag and call blk_mq_exit_queue from del_gendisk for all disks that do not have separately allocated queues, and thus remove the need to call blk_cleanup_queue for them. Rename blk_cleanup_disk to blk_mq_destroy_queue to make it clear that this function is intended only for separately allocated blk-mq queues. This saves an extra queue freeze for devices without a separately allocated queue. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-6-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 37 ------------------------- block/blk-mq.c | 43 +++++++++++++++++++++++++++-- block/blk-sysfs.c | 5 ---- block/blk.h | 3 ++ block/bsg-lib.c | 4 +-- block/genhd.c | 23 ++++++++------- drivers/block/ataflop.c | 1 - drivers/block/loop.c | 1 - drivers/block/mtip32xx/mtip32xx.c | 2 -- drivers/block/rnbd/rnbd-clt.c | 2 +- drivers/block/sx8.c | 4 +-- drivers/block/virtio_blk.c | 1 - drivers/block/z2ram.c | 1 - drivers/cdrom/gdrom.c | 1 - drivers/memstick/core/ms_block.c | 1 - drivers/memstick/core/mspro_block.c | 1 - drivers/mmc/core/block.c | 1 - drivers/mmc/core/queue.c | 1 - drivers/nvme/host/apple.c | 2 +- drivers/nvme/host/core.c | 1 - drivers/nvme/host/fc.c | 12 ++++---- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 12 ++++---- drivers/nvme/host/tcp.c | 12 ++++---- drivers/nvme/target/loop.c | 12 ++++---- drivers/s390/block/dasd.c | 2 +- drivers/s390/block/dasd_genhd.c | 4 +-- drivers/scsi/scsi_lib.c | 6 ++-- drivers/scsi/scsi_sysfs.c | 2 +- drivers/scsi/sd.c | 4 +-- drivers/scsi/sr.c | 4 +-- drivers/ufs/core/ufshcd.c | 4 +-- include/linux/blk-mq.h | 3 ++ include/linux/blkdev.h | 4 +-- 34 files changed, 105 insertions(+), 113 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 04029ffea031..5ad7bd93077c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -284,43 +284,6 @@ void blk_queue_start_drain(struct request_queue *q) wake_up_all(&q->mq_freeze_wq); } -/** - * blk_cleanup_queue - shutdown a request queue - * @q: request queue to shutdown - * - * Mark @q DYING, drain all pending requests, mark @q DEAD, destroy and - * put it. All future requests will be failed immediately with -ENODEV. - * - * Context: can sleep - */ -void blk_cleanup_queue(struct request_queue *q) -{ - /* cannot be called from atomic context */ - might_sleep(); - - WARN_ON_ONCE(blk_queue_registered(q)); - - /* mark @q DYING, no new request or merges will be allowed afterwards */ - blk_queue_flag_set(QUEUE_FLAG_DYING, q); - blk_queue_start_drain(q); - - /* - * Drain all requests queued before DYING marking. Set DEAD flag to - * prevent that blk_mq_run_hw_queues() accesses the hardware queues - * after draining finished. - */ - blk_freeze_queue(q); - blk_sync_queue(q); - if (queue_is_mq(q)) { - blk_mq_cancel_work_sync(q); - blk_mq_exit_queue(q); - } - - /* @q is and will stay empty, shutdown and put */ - blk_put_queue(q); -} -EXPORT_SYMBOL(blk_cleanup_queue); - /** * blk_queue_enter() - try to increase q->q_usage_counter * @q: request queue pointer diff --git a/block/blk-mq.c b/block/blk-mq.c index 92aae03103b7..b1dbc4b2c2c9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3902,7 +3902,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set, q->queuedata = queuedata; ret = blk_mq_init_allocated_queue(set, q); if (ret) { - blk_cleanup_queue(q); + blk_put_queue(q); return ERR_PTR(ret); } return q; @@ -3914,6 +3914,35 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) } EXPORT_SYMBOL(blk_mq_init_queue); +/** + * blk_mq_destroy_queue - shutdown a request queue + * @q: request queue to shutdown + * + * This shuts down a request queue allocated by blk_mq_init_queue() and drops + * the initial reference. All future requests will failed with -ENODEV. + * + * Context: can sleep + */ +void blk_mq_destroy_queue(struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_mq(q)); + WARN_ON_ONCE(blk_queue_registered(q)); + + might_sleep(); + + blk_queue_flag_set(QUEUE_FLAG_DYING, q); + blk_queue_start_drain(q); + blk_freeze_queue(q); + + blk_sync_queue(q); + blk_mq_cancel_work_sync(q); + blk_mq_exit_queue(q); + + /* @q is and will stay empty, shutdown and put */ + blk_put_queue(q); +} +EXPORT_SYMBOL(blk_mq_destroy_queue); + struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, struct lock_class_key *lkclass) { @@ -3926,13 +3955,23 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, disk = __alloc_disk_node(q, set->numa_node, lkclass); if (!disk) { - blk_cleanup_queue(q); + blk_put_queue(q); return ERR_PTR(-ENOMEM); } + set_bit(GD_OWNS_QUEUE, &disk->state); return disk; } EXPORT_SYMBOL(__blk_mq_alloc_disk); +struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, + struct lock_class_key *lkclass) +{ + if (!blk_get_queue(q)) + return NULL; + return __alloc_disk_node(q, NUMA_NO_NODE, lkclass); +} +EXPORT_SYMBOL(blk_mq_alloc_disk_for_queue); + static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( struct blk_mq_tag_set *set, struct request_queue *q, int hctx_idx, int node) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 69e53d1a4f0e..9b211e519de8 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -755,11 +755,6 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) * decremented with blk_put_queue(). Once the refcount reaches 0 this function * is called. * - * For drivers that have a request_queue on a gendisk and added with - * __device_add_disk() the refcount to request_queue will reach 0 with - * the last put_disk() called by the driver. For drivers which don't use - * __device_add_disk() this happens with blk_cleanup_queue(). - * * Drivers exist which depend on the release of the request_queue to be * synchronous, it should not be deferred. * diff --git a/block/blk.h b/block/blk.h index 8e79296ee97a..1a0d3e6a4a63 100644 --- a/block/blk.h +++ b/block/blk.h @@ -424,6 +424,9 @@ int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); void blk_drop_partitions(struct gendisk *disk); +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); + int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); diff --git a/block/bsg-lib.c b/block/bsg-lib.c index acfe1357bf6c..fd4cd5e68282 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -324,7 +324,7 @@ void bsg_remove_queue(struct request_queue *q) container_of(q->tag_set, struct bsg_set, tag_set); bsg_unregister_queue(bset->bd); - blk_cleanup_queue(q); + blk_mq_destroy_queue(q); blk_mq_free_tag_set(&bset->tag_set); kfree(bset); } @@ -399,7 +399,7 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, return q; out_cleanup_queue: - blk_cleanup_queue(q); + blk_mq_destroy_queue(q); out_queue: blk_mq_free_tag_set(set); out_tag_set: diff --git a/block/genhd.c b/block/genhd.c index 278227ba1d53..4d15f828c449 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -617,6 +617,8 @@ void del_gendisk(struct gendisk *disk) * Fail any new I/O. */ set_bit(GD_DEAD, &disk->state); + if (test_bit(GD_OWNS_QUEUE, &disk->state)) + blk_queue_flag_set(QUEUE_FLAG_DYING, q); set_capacity(disk, 0); /* @@ -663,11 +665,16 @@ void del_gendisk(struct gendisk *disk) blk_mq_unquiesce_queue(q); /* - * Allow using passthrough request again after the queue is torn down. + * If the disk does not own the queue, allow using passthrough requests + * again. Else leave the queue frozen to fail all I/O. */ - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); - __blk_mq_unfreeze_queue(q, true); - + if (!test_bit(GD_OWNS_QUEUE, &disk->state)) { + blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + __blk_mq_unfreeze_queue(q, true); + } else { + if (queue_is_mq(q)) + blk_mq_exit_queue(q); + } } EXPORT_SYMBOL(del_gendisk); @@ -1338,9 +1345,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, { struct gendisk *disk; - if (!blk_get_queue(q)) - return NULL; - disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (!disk) goto out_put_queue; @@ -1391,7 +1395,6 @@ out_put_queue: blk_put_queue(q); return NULL; } -EXPORT_SYMBOL(__alloc_disk_node); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass) { @@ -1404,9 +1407,10 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass) disk = __alloc_disk_node(q, node, lkclass); if (!disk) { - blk_cleanup_queue(q); + blk_put_queue(q); return NULL; } + set_bit(GD_OWNS_QUEUE, &disk->state); return disk; } EXPORT_SYMBOL(__blk_alloc_disk); @@ -1439,7 +1443,6 @@ EXPORT_SYMBOL(put_disk); */ void blk_cleanup_disk(struct gendisk *disk) { - blk_cleanup_queue(disk->queue); put_disk(disk); } EXPORT_SYMBOL(blk_cleanup_disk); diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index e232cc4fd444..c6e41ee18aaa 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2045,7 +2045,6 @@ static void atari_floppy_cleanup(void) if (!unit[i].disk[type]) continue; del_gendisk(unit[i].disk[type]); - blk_cleanup_queue(unit[i].disk[type]->queue); put_disk(unit[i].disk[type]); } blk_mq_free_tag_set(&unit[i].tag_set); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 084f9b8a0ba3..cc608226c8c7 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2057,7 +2057,6 @@ static void loop_remove(struct loop_device *lo) { /* Make this loop device unreachable from pathname. */ del_gendisk(lo->lo_disk); - blk_cleanup_queue(lo->lo_disk->queue); blk_mq_free_tag_set(&lo->tag_set); mutex_lock(&loop_ctl_mutex); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index e7604b3bf8a7..1d0e0a9fdd7c 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3565,7 +3565,6 @@ static int mtip_block_shutdown(struct driver_data *dd) if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) del_gendisk(dd->disk); - blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); put_disk(dd->disk); return 0; @@ -3914,7 +3913,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) dev_info(&dd->pdev->dev, "device %s surprise removal\n", dd->disk->disk_name); - blk_cleanup_queue(dd->queue); blk_mq_free_tag_set(&dd->tags); /* De-initialize the protocol layer. */ diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 409c76b81aed..a4470374f54f 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1755,7 +1755,7 @@ static void rnbd_destroy_sessions(void) list_for_each_entry_safe(dev, tn, &sess->devs_list, list) { /* * Here unmap happens in parallel for only one reason: - * blk_cleanup_queue() takes around half a second, so + * del_gendisk() takes around half a second, so * on huge amount of devices the whole module unload * procedure takes minutes. */ diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 63b4f6431d2e..75057dbbcfbe 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1536,7 +1536,7 @@ err_out_free_majors: clear_bit(0, &carm_major_alloc); else if (host->major == 161) clear_bit(1, &carm_major_alloc); - blk_cleanup_queue(host->oob_q); + blk_mq_destroy_queue(host->oob_q); blk_mq_free_tag_set(&host->tag_set); err_out_dma_free: dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma); @@ -1570,7 +1570,7 @@ static void carm_remove_one (struct pci_dev *pdev) clear_bit(0, &carm_major_alloc); else if (host->major == 161) clear_bit(1, &carm_major_alloc); - blk_cleanup_queue(host->oob_q); + blk_mq_destroy_queue(host->oob_q); blk_mq_free_tag_set(&host->tag_set); dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma); iounmap(host->mmio); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6fc7850c2b0a..cff1b6f6b054 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1111,7 +1111,6 @@ static void virtblk_remove(struct virtio_device *vdev) flush_work(&vblk->config_work); del_gendisk(vblk->disk); - blk_cleanup_queue(vblk->disk->queue); blk_mq_free_tag_set(&vblk->tag_set); mutex_lock(&vblk->vdev_mutex); diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 7a6ed83481b8..18ad43d9933e 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -384,7 +384,6 @@ static void __exit z2_exit(void) for (i = 0; i < Z2MINOR_COUNT; i++) { del_gendisk(z2ram_gendisk[i]); - blk_cleanup_queue(z2ram_gendisk[i]->queue); put_disk(z2ram_gendisk[i]); } blk_mq_free_tag_set(&tag_set); diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 8e78b37d0f6a..f4cc90ea6198 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -831,7 +831,6 @@ probe_fail_no_mem: static int remove_gdrom(struct platform_device *devptr) { - blk_cleanup_queue(gd.gdrom_rq); blk_mq_free_tag_set(&gd.tag_set); free_irq(HW_EVENT_GDROM_CMD, &gd); free_irq(HW_EVENT_GDROM_DMA, &gd); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 3993bdd4b519..ba7e7249a3db 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2187,7 +2187,6 @@ static void msb_remove(struct memstick_dev *card) /* Remove the disk */ del_gendisk(msb->disk); - blk_cleanup_queue(msb->queue); blk_mq_free_tag_set(&msb->tag_set); msb->queue = NULL; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 725ba74ded30..72e91c06c618 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1294,7 +1294,6 @@ static void mspro_block_remove(struct memstick_dev *card) del_gendisk(msb->disk); dev_dbg(&card->dev, "mspro block remove\n"); - blk_cleanup_queue(msb->queue); blk_mq_free_tag_set(&msb->tag_set); msb->queue = NULL; diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f4a1281658db..bda6c67ce93f 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2509,7 +2509,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, return md; err_cleanup_queue: - blk_cleanup_queue(md->disk->queue); blk_mq_free_tag_set(&md->queue.tag_set); err_kfree: kfree(md); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index fa5324ceeebe..f824cfdab75a 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -494,7 +494,6 @@ void mmc_cleanup_queue(struct mmc_queue *mq) if (blk_queue_quiesced(q)) blk_mq_unquiesce_queue(q); - blk_cleanup_queue(q); blk_mq_free_tag_set(&mq->tag_set); /* diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index d702d7d60235..2d23b7d41f7e 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1502,7 +1502,7 @@ static int apple_nvme_probe(struct platform_device *pdev) if (!blk_get_queue(anv->ctrl.admin_q)) { nvme_start_admin_queue(&anv->ctrl); - blk_cleanup_queue(anv->ctrl.admin_q); + blk_mq_destroy_queue(anv->ctrl.admin_q); anv->ctrl.admin_q = NULL; ret = -ENODEV; goto put_dev; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b3d9c29aba1e..4e3a0f7bfc9c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4103,7 +4103,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); - blk_cleanup_queue(ns->queue); down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3c778bb0c294..a96aa831684c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2392,7 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref) unsigned long flags; if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_destroy_queue(ctrl->ctrl.connect_q); blk_mq_free_tag_set(&ctrl->tag_set); } @@ -2402,8 +2402,8 @@ nvme_fc_ctrl_free(struct kref *ref) spin_unlock_irqrestore(&ctrl->rport->lock, flags); nvme_start_admin_queue(&ctrl->ctrl); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_cleanup_queue(ctrl->ctrl.fabrics_q); + blk_mq_destroy_queue(ctrl->ctrl.admin_q); + blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); kfree(ctrl->queues); @@ -2953,7 +2953,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); out_cleanup_blk_queue: - blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_destroy_queue(ctrl->ctrl.connect_q); out_free_tag_set: blk_mq_free_tag_set(&ctrl->tag_set); nvme_fc_free_io_queues(ctrl); @@ -3642,9 +3642,9 @@ fail_ctrl: return ERR_PTR(-EIO); out_cleanup_admin_q: - blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_destroy_queue(ctrl->ctrl.admin_q); out_cleanup_fabrics_q: - blk_cleanup_queue(ctrl->ctrl.fabrics_q); + blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); out_free_admin_tag_set: blk_mq_free_tag_set(&ctrl->admin_tag_set); out_free_queues: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d7b24ee17285..247a74aba336 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1760,7 +1760,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev) * queue to flush these to completion. */ nvme_start_admin_queue(&dev->ctrl); - blk_cleanup_queue(dev->ctrl.admin_q); + blk_mq_destroy_queue(dev->ctrl.admin_q); blk_mq_free_tag_set(&dev->admin_tagset); } } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f2a5e1ea508a..0fb7c8e7ab0b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -840,8 +840,8 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { if (remove) { - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_cleanup_queue(ctrl->ctrl.fabrics_q); + blk_mq_destroy_queue(ctrl->ctrl.admin_q); + blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); } if (ctrl->async_event_sqe.data) { @@ -935,10 +935,10 @@ out_stop_queue: nvme_cancel_admin_tagset(&ctrl->ctrl); out_cleanup_queue: if (new) - blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_destroy_queue(ctrl->ctrl.admin_q); out_cleanup_fabrics_q: if (new) - blk_cleanup_queue(ctrl->ctrl.fabrics_q); + blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); out_free_tagset: if (new) blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); @@ -957,7 +957,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { if (remove) { - blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_destroy_queue(ctrl->ctrl.connect_q); blk_mq_free_tag_set(ctrl->ctrl.tagset); } nvme_rdma_free_io_queues(ctrl); @@ -1012,7 +1012,7 @@ out_wait_freeze_timed_out: out_cleanup_connect_q: nvme_cancel_tagset(&ctrl->ctrl); if (new) - blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_destroy_queue(ctrl->ctrl.connect_q); out_free_tag_set: if (new) blk_mq_free_tag_set(ctrl->ctrl.tagset); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index bb67538d241b..b81942fa5f95 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1885,7 +1885,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); if (remove) { - blk_cleanup_queue(ctrl->connect_q); + blk_mq_destroy_queue(ctrl->connect_q); blk_mq_free_tag_set(ctrl->tagset); } nvme_tcp_free_io_queues(ctrl); @@ -1940,7 +1940,7 @@ out_wait_freeze_timed_out: out_cleanup_connect_q: nvme_cancel_tagset(ctrl); if (new) - blk_cleanup_queue(ctrl->connect_q); + blk_mq_destroy_queue(ctrl->connect_q); out_free_tag_set: if (new) blk_mq_free_tag_set(ctrl->tagset); @@ -1953,8 +1953,8 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); if (remove) { - blk_cleanup_queue(ctrl->admin_q); - blk_cleanup_queue(ctrl->fabrics_q); + blk_mq_destroy_queue(ctrl->admin_q); + blk_mq_destroy_queue(ctrl->fabrics_q); blk_mq_free_tag_set(ctrl->admin_tagset); } nvme_tcp_free_admin_queue(ctrl); @@ -2012,10 +2012,10 @@ out_stop_queue: nvme_cancel_admin_tagset(ctrl); out_cleanup_queue: if (new) - blk_cleanup_queue(ctrl->admin_q); + blk_mq_destroy_queue(ctrl->admin_q); out_cleanup_fabrics_q: if (new) - blk_cleanup_queue(ctrl->fabrics_q); + blk_mq_destroy_queue(ctrl->fabrics_q); out_free_tagset: if (new) blk_mq_free_tag_set(ctrl->admin_tagset); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 59024af2da2e..0f5c77e22a0a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -266,8 +266,8 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) return; nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_cleanup_queue(ctrl->ctrl.fabrics_q); + blk_mq_destroy_queue(ctrl->ctrl.admin_q); + blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); blk_mq_free_tag_set(&ctrl->admin_tag_set); } @@ -283,7 +283,7 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) mutex_unlock(&nvme_loop_ctrl_mutex); if (nctrl->tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_destroy_queue(ctrl->ctrl.connect_q); blk_mq_free_tag_set(&ctrl->tag_set); } kfree(ctrl->queues); @@ -410,9 +410,9 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) out_cleanup_queue: clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); - blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_destroy_queue(ctrl->ctrl.admin_q); out_cleanup_fabrics_q: - blk_cleanup_queue(ctrl->ctrl.fabrics_q); + blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); out_free_tagset: blk_mq_free_tag_set(&ctrl->admin_tag_set); out_free_sq: @@ -554,7 +554,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) return 0; out_cleanup_connect_q: - blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_destroy_queue(ctrl->ctrl.connect_q); out_free_tagset: blk_mq_free_tag_set(&ctrl->tag_set); out_destroy_queues: diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index ba6d78789660..e8489331f12b 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3280,7 +3280,7 @@ static int dasd_alloc_queue(struct dasd_block *block) static void dasd_free_queue(struct dasd_block *block) { if (block->request_queue) { - blk_cleanup_queue(block->request_queue); + blk_mq_destroy_queue(block->request_queue); blk_mq_free_tag_set(&block->tag_set); block->request_queue = NULL; } diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index a7a33ebf4bbe..5a83f0a39901 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,8 +41,8 @@ int dasd_gendisk_alloc(struct dasd_block *block) if (base->devindex >= DASD_PER_MAJOR) return -EBUSY; - gdp = __alloc_disk_node(block->request_queue, NUMA_NO_NODE, - &dasd_bio_compl_lkclass); + gdp = blk_mq_alloc_disk_for_queue(block->request_queue, + &dasd_bio_compl_lkclass); if (!gdp) return -ENOMEM; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6ffc9e4258a8..cdf0056582d5 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -163,7 +163,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) * Requeue this command. It will go before all other commands * that are already in the queue. Schedule requeue work under * lock such that the kblockd_schedule_work() call happens - * before blk_cleanup_queue() finishes. + * before blk_mq_destroy_queue() finishes. */ cmd->result = 0; @@ -424,9 +424,9 @@ static void scsi_starved_list_run(struct Scsi_Host *shost) * it and the queue. Mitigate by taking a reference to the * queue and never touching the sdev again after we drop the * host lock. Note: if __scsi_remove_device() invokes - * blk_cleanup_queue() before the queue is run from this + * blk_mq_destroy_queue() before the queue is run from this * function then blk_run_queue() will return immediately since - * blk_cleanup_queue() marks the queue with QUEUE_FLAG_DYING. + * blk_mq_destroy_queue() marks the queue with QUEUE_FLAG_DYING. */ slq = sdev->request_queue; if (!blk_get_queue(slq)) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 43949798a2e4..aa70d9282161 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1475,7 +1475,7 @@ void __scsi_remove_device(struct scsi_device *sdev) scsi_device_set_state(sdev, SDEV_DEL); mutex_unlock(&sdev->state_mutex); - blk_cleanup_queue(sdev->request_queue); + blk_mq_destroy_queue(sdev->request_queue); cancel_work_sync(&sdev->requeue_work); if (sdev->host->hostt->slave_destroy) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a1a2ac09066f..cb587e488601 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3440,8 +3440,8 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - gd = __alloc_disk_node(sdp->request_queue, NUMA_NO_NODE, - &sd_bio_compl_lkclass); + gd = blk_mq_alloc_disk_for_queue(sdp->request_queue, + &sd_bio_compl_lkclass); if (!gd) goto out_free; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 32d3b8274f14..a278b739d0c5 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -624,8 +624,8 @@ static int sr_probe(struct device *dev) if (!cd) goto fail; - disk = __alloc_disk_node(sdev->request_queue, NUMA_NO_NODE, - &sr_bio_compl_lkclass); + disk = blk_mq_alloc_disk_for_queue(sdev->request_queue, + &sr_bio_compl_lkclass); if (!disk) goto fail_free; mutex_init(&cd->lock); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index ce86d1b790c0..91d8852daaa9 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -9487,7 +9487,7 @@ void ufshcd_remove(struct ufs_hba *hba) ufs_bsg_remove(hba); ufshpb_remove(hba); ufs_sysfs_remove_nodes(hba->dev); - blk_cleanup_queue(hba->tmf_queue); + blk_mq_destroy_queue(hba->tmf_queue); blk_mq_free_tag_set(&hba->tmf_tag_set); scsi_remove_host(hba->host); /* disable interrupts */ @@ -9783,7 +9783,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) return 0; free_tmf_queue: - blk_cleanup_queue(hba->tmf_queue); + blk_mq_destroy_queue(hba->tmf_queue); free_tmf_tag_set: blk_mq_free_tag_set(&hba->tmf_tag_set); out_remove_scsi_host: diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e2d9daf7e8dd..0fd96e92c6c6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -686,10 +686,13 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, \ __blk_mq_alloc_disk(set, queuedata, &__key); \ }) +struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, + struct lock_class_key *lkclass); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); void blk_mq_unregister_dev(struct device *, struct request_queue *); +void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f4632f4fe884..530eeccffda3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -148,6 +148,7 @@ struct gendisk { #define GD_NATIVE_CAPACITY 3 #define GD_ADDED 4 #define GD_SUPPRESS_PART_SCAN 5 +#define GD_OWNS_QUEUE 6 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ @@ -815,8 +816,6 @@ static inline u64 sb_bdev_nr_blocks(struct super_block *sb) int bdev_disk_changed(struct gendisk *disk, bool invalidate); -struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, - struct lock_class_key *lkclass); void put_disk(struct gendisk *disk); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); @@ -933,7 +932,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, /* * Access functions for manipulating queue properties */ -extern void blk_cleanup_queue(struct request_queue *); void blk_queue_bounce_limit(struct request_queue *q, enum blk_bounce limit); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); From 8b9ab62662048a3274361c7e5f64037c2c133e2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Jun 2022 08:05:52 +0200 Subject: [PATCH 041/178] block: remove blk_cleanup_disk blk_cleanup_disk is nothing but a trivial wrapper for put_disk now, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20220619060552.1850436-7-hch@lst.de Signed-off-by: Jens Axboe --- arch/m68k/emu/nfblock.c | 4 ++-- arch/um/drivers/ubd_kern.c | 4 ++-- arch/xtensa/platforms/iss/simdisk.c | 4 ++-- block/genhd.c | 15 --------------- drivers/block/amiflop.c | 2 +- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/aoe/aoedev.c | 2 +- drivers/block/ataflop.c | 4 ++-- drivers/block/brd.c | 4 ++-- drivers/block/drbd/drbd_main.c | 4 ++-- drivers/block/floppy.c | 6 +++--- drivers/block/loop.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 2 +- drivers/block/n64cart.c | 2 +- drivers/block/nbd.c | 4 ++-- drivers/block/null_blk/main.c | 4 ++-- drivers/block/paride/pcd.c | 4 ++-- drivers/block/paride/pd.c | 4 ++-- drivers/block/paride/pf.c | 4 ++-- drivers/block/pktcdvd.c | 4 ++-- drivers/block/ps3disk.c | 4 ++-- drivers/block/ps3vram.c | 4 ++-- drivers/block/rbd.c | 2 +- drivers/block/rnbd/rnbd-clt.c | 4 ++-- drivers/block/sunvdc.c | 4 ++-- drivers/block/swim.c | 2 +- drivers/block/swim3.c | 2 +- drivers/block/sx8.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 4 ++-- drivers/block/z2ram.c | 2 +- drivers/block/zram/zram_drv.c | 4 ++-- drivers/cdrom/gdrom.c | 2 +- drivers/md/bcache/super.c | 2 +- drivers/md/dm.c | 2 +- drivers/md/md.c | 4 ++-- drivers/memstick/core/ms_block.c | 2 +- drivers/memstick/core/mspro_block.c | 2 +- drivers/mtd/mtd_blkdevs.c | 4 ++-- drivers/mtd/ubi/block.c | 4 ++-- drivers/nvdimm/btt.c | 4 ++-- drivers/nvdimm/pmem.c | 4 ++-- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/multipath.c | 2 +- drivers/s390/block/dcssblk.c | 8 ++++---- drivers/s390/block/scm_blk.c | 4 ++-- include/linux/blkdev.h | 1 - 47 files changed, 74 insertions(+), 90 deletions(-) diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 267b02cc5655..a708fbd5a844 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize) return 0; out_cleanup_disk: - blk_cleanup_disk(dev->disk); + put_disk(dev->disk); free_dev: kfree(dev); out: @@ -180,7 +180,7 @@ static void __exit nfhd_exit(void) list_for_each_entry_safe(dev, next, &nfhd_list, list) { list_del(&dev->list); del_gendisk(dev->disk); - blk_cleanup_disk(dev->disk); + put_disk(dev->disk); kfree(dev); } unregister_blkdev(major_num, "nfhd"); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index c4344b67628d..479b79e11442 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -925,7 +925,7 @@ static int ubd_add(int n, char **error_out) return 0; out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_cleanup_tags: blk_mq_free_tag_set(&ubd_dev->tag_set); out: @@ -1032,7 +1032,7 @@ static int ubd_remove(int n, char **error_out) ubd_gendisk[n] = NULL; if(disk != NULL){ del_gendisk(disk); - blk_cleanup_disk(disk); + put_disk(disk); } err = 0; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 4255b92fa3eb..f50caaa1c249 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -290,7 +290,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which, return 0; out_cleanup_disk: - blk_cleanup_disk(dev->gd); + put_disk(dev->gd); out: return err; } @@ -344,7 +344,7 @@ static void simdisk_teardown(struct simdisk *dev, int which, simdisk_detach(dev); if (dev->gd) { del_gendisk(dev->gd); - blk_cleanup_disk(dev->gd); + put_disk(dev->gd); } remove_proc_entry(tmp, procdir); } diff --git a/block/genhd.c b/block/genhd.c index 4d15f828c449..bf9be06af2c8 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1432,21 +1432,6 @@ void put_disk(struct gendisk *disk) } EXPORT_SYMBOL(put_disk); -/** - * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk - * @disk: gendisk to shutdown - * - * Mark the queue hanging off @disk DYING, drain all pending requests, then mark - * the queue DEAD, destroy and put it and the gendisk structure. - * - * Context: can sleep - */ -void blk_cleanup_disk(struct gendisk *disk) -{ - put_disk(disk); -} -EXPORT_SYMBOL(blk_cleanup_disk); - static void set_disk_ro_uevent(struct gendisk *gd, int ro) { char event[] = "DISK_RO=1"; diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 5a566f2fd533..4c8b2ba579ee 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1802,7 +1802,7 @@ static int fd_alloc_disk(int drive, int system) unit[drive].gendisk[system] = disk; err = add_disk(disk); if (err) - blk_cleanup_disk(disk); + put_disk(disk); return err; } diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 348adf335217..12b3ca8f6f4a 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -427,7 +427,7 @@ aoeblk_gdalloc(void *vp) return; out_disk_cleanup: - blk_cleanup_disk(gd); + put_disk(gd); err_tagset: blk_mq_free_tag_set(set); err_mempool: diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index b381d1c3ef32..3523dd82d7a0 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -277,7 +277,7 @@ freedev(struct aoedev *d) if (d->gd) { aoedisk_rm_debugfs(d); del_gendisk(d->gd); - blk_cleanup_disk(d->gd); + put_disk(d->gd); blk_mq_free_tag_set(&d->tag_set); } t = d->targets; diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index c6e41ee18aaa..9deb4df6bdb8 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c @@ -2031,7 +2031,7 @@ static void ataflop_probe(dev_t dev) return; cleanup_disk: - blk_cleanup_disk(unit[drive].disk[type]); + put_disk(unit[drive].disk[type]); unit[drive].disk[type] = NULL; } @@ -2063,7 +2063,7 @@ static void atari_cleanup_floppy_disk(struct atari_floppy_struct *fs) continue; if (fs->registered[type]) del_gendisk(fs->disk[type]); - blk_cleanup_disk(fs->disk[type]); + put_disk(fs->disk[type]); } blk_mq_free_tag_set(&fs->tag_set); } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 6e3f2f0d2352..9e26d5e769f3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -419,7 +419,7 @@ static int brd_alloc(int i) return 0; out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_free_dev: list_del(&brd->brd_list); kfree(brd); @@ -439,7 +439,7 @@ static void brd_cleanup(void) list_for_each_entry_safe(brd, next, &brd_devices, brd_list) { del_gendisk(brd->brd_disk); - blk_cleanup_disk(brd->brd_disk); + put_disk(brd->brd_disk); brd_free_pages(brd); list_del(&brd->brd_list); kfree(brd); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2887350ae010..f3e4db16fd07 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2207,7 +2207,7 @@ void drbd_destroy_device(struct kref *kref) if (device->bitmap) /* should no longer be there. */ drbd_bm_cleanup(device); __free_page(device->md_io.page); - blk_cleanup_disk(device->vdisk); + put_disk(device->vdisk); kfree(device->rs_plan_s); /* not for_each_connection(connection, resource): @@ -2807,7 +2807,7 @@ out_no_minor_idr: out_no_bitmap: __free_page(device->md_io.page); out_no_io_page: - blk_cleanup_disk(disk); + put_disk(disk); out_no_disk: kref_put(&resource->kref, drbd_destroy_resource); kfree(device); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 015841f50f4e..491e7205a0db 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4557,7 +4557,7 @@ out: return; cleanup_disk: - blk_cleanup_disk(disks[drive][type]); + put_disk(disks[drive][type]); disks[drive][type] = NULL; mutex_unlock(&floppy_probe_lock); } @@ -4753,7 +4753,7 @@ out_put_disk: if (!disks[drive][0]) break; del_timer_sync(&motor_off_timer[drive]); - blk_cleanup_disk(disks[drive][0]); + put_disk(disks[drive][0]); blk_mq_free_tag_set(&tag_sets[drive]); } return err; @@ -4985,7 +4985,7 @@ static void __exit floppy_module_exit(void) } for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { if (disks[drive][i]) - blk_cleanup_disk(disks[drive][i]); + put_disk(disks[drive][i]); } blk_mq_free_tag_set(&tag_sets[drive]); } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cc608226c8c7..e3c0ba93c1a3 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2040,7 +2040,7 @@ static int loop_add(int i) return i; out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 1d0e0a9fdd7c..e116c6cf56f5 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3534,7 +3534,7 @@ init_hw_cmds_error: disk_index_error: ida_free(&rssd_index_ida, index); ida_get_error: - blk_cleanup_disk(dd->disk); + put_disk(dd->disk); block_queue_alloc_init_error: blk_mq_free_tag_set(&dd->tags); block_queue_alloc_tag_error: diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index e094d2b8b5a9..d914156db2d8 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -157,7 +157,7 @@ static int __init n64cart_probe(struct platform_device *pdev) return 0; out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out: return err; } diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 07f3c139a3d7..5c4c9c45c6ac 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -250,7 +250,7 @@ static void nbd_dev_remove(struct nbd_device *nbd) struct gendisk *disk = nbd->disk; del_gendisk(disk); - blk_cleanup_disk(disk); + put_disk(disk); blk_mq_free_tag_set(&nbd->tag_set); /* @@ -1833,7 +1833,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) out_free_work: destroy_workqueue(nbd->recv_workq); out_err_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_free_idr: mutex_lock(&nbd_index_mutex); idr_remove(&nbd_index_idr, index); diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 6b67088f4ea7..d695ea29efa6 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1737,7 +1737,7 @@ static void null_del_dev(struct nullb *nullb) null_restart_queue_async(nullb); } - blk_cleanup_disk(nullb->disk); + put_disk(nullb->disk); if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); @@ -2082,7 +2082,7 @@ static int null_add_dev(struct nullb_device *dev) out_cleanup_zone: null_free_zoned_dev(dev); out_cleanup_disk: - blk_cleanup_disk(nullb->disk); + put_disk(nullb->disk); out_cleanup_tags: if (dev->queue_mode == NULL_Q_MQ && nullb->tag_set == &nullb->__tag_set) blk_mq_free_tag_set(nullb->tag_set); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index f462ad67931a..a5ab40784119 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -956,7 +956,7 @@ out_unreg_cdrom: out_pi_release: pi_release(cd->pi); out_free_disk: - blk_cleanup_disk(cd->disk); + put_disk(cd->disk); out_free_tag_set: blk_mq_free_tag_set(&cd->tag_set); return ret; @@ -1029,7 +1029,7 @@ static void __exit pcd_exit(void) unregister_cdrom(&cd->info); del_gendisk(cd->disk); pi_release(cd->pi); - blk_cleanup_disk(cd->disk); + put_disk(cd->disk); blk_mq_free_tag_set(&cd->tag_set); } diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 3637c38c72f9..c8c14c6f5c3a 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -943,7 +943,7 @@ static int pd_probe_drive(struct pd_unit *disk, int autoprobe, int port, goto cleanup_disk; return 0; cleanup_disk: - blk_cleanup_disk(disk->gd); + put_disk(disk->gd); put_disk: put_disk(p); disk->gd = NULL; @@ -1018,7 +1018,7 @@ static void __exit pd_exit(void) if (p) { disk->gd = NULL; del_gendisk(p); - blk_cleanup_disk(p); + put_disk(p); blk_mq_free_tag_set(&disk->tag_set); pi_release(disk->pi); } diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index 292e9a4ce1b9..eec1b9fde245 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -975,7 +975,7 @@ static int __init pf_init_unit(struct pf_unit *pf, bool autoprobe, int port, out_pi_release: pi_release(pf->pi); out_free_disk: - blk_cleanup_disk(pf->disk); + put_disk(pf->disk); out_free_tag_set: blk_mq_free_tag_set(&pf->tag_set); return ret; @@ -1044,7 +1044,7 @@ static void __exit pf_exit(void) if (!pf->present) continue; del_gendisk(pf->disk); - blk_cleanup_disk(pf->disk); + put_disk(pf->disk); blk_mq_free_tag_set(&pf->tag_set); pi_release(pf->pi); } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 789093375344..653d24231483 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2733,7 +2733,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) return 0; out_mem2: - blk_cleanup_disk(disk); + put_disk(disk); out_mem: mempool_exit(&pd->rb_pool); kfree(pd); @@ -2783,7 +2783,7 @@ static int pkt_remove_dev(dev_t pkt_dev) pkt_dbg(1, pd, "writer unmapped\n"); del_gendisk(pd->disk); - blk_cleanup_disk(pd->disk); + put_disk(pd->disk); mempool_exit(&pd->rb_pool); kfree(pd); diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 3054adf77460..36d7b36c60c7 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -473,7 +473,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev) return 0; fail_cleanup_disk: - blk_cleanup_disk(gendisk); + put_disk(gendisk); fail_free_tag_set: blk_mq_free_tag_set(&priv->tag_set); fail_teardown: @@ -500,7 +500,7 @@ static void ps3disk_remove(struct ps3_system_bus_device *_dev) &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); - blk_cleanup_disk(priv->gendisk); + put_disk(priv->gendisk); blk_mq_free_tag_set(&priv->tag_set); dev_notice(&dev->sbd.core, "Synchronizing disk cache\n"); ps3disk_sync_cache(dev); diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index 4f90819e245e..d1e0fefec90b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -761,7 +761,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev) return 0; out_cleanup_disk: - blk_cleanup_disk(gendisk); + put_disk(gendisk); out_cache_cleanup: remove_proc_entry(DEVICE_NAME, NULL); ps3vram_cache_cleanup(dev); @@ -792,7 +792,7 @@ static void ps3vram_remove(struct ps3_system_bus_device *dev) struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev); del_gendisk(priv->gendisk); - blk_cleanup_disk(priv->gendisk); + put_disk(priv->gendisk); remove_proc_entry(DEVICE_NAME, NULL); ps3vram_cache_cleanup(dev); iounmap(priv->reports); diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ef9bc62e9afd..0d8ec2fe5740 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4729,7 +4729,7 @@ static blk_status_t rbd_queue_rq(struct blk_mq_hw_ctx *hctx, static void rbd_free_disk(struct rbd_device *rbd_dev) { - blk_cleanup_disk(rbd_dev->disk); + put_disk(rbd_dev->disk); blk_mq_free_tag_set(&rbd_dev->tag_set); rbd_dev->disk = NULL; } diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index a4470374f54f..b8d9e2824d9c 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1408,7 +1408,7 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx) blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue); err = add_disk(dev->gd); if (err) - blk_cleanup_disk(dev->gd); + put_disk(dev->gd); return err; } @@ -1630,7 +1630,7 @@ put_sess: static void destroy_gen_disk(struct rnbd_clt_dev *dev) { del_gendisk(dev->gd); - blk_cleanup_disk(dev->gd); + put_disk(dev->gd); } static void destroy_sysfs(struct rnbd_clt_dev *dev, diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index dd0a1a6fed29..fb855da971ee 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -886,7 +886,7 @@ static int probe_disk(struct vdc_port *port) return 0; out_cleanup_disk: - blk_cleanup_disk(g); + put_disk(g); out_free_tag: blk_mq_free_tag_set(&port->tag_set); return err; @@ -1070,7 +1070,7 @@ static void vdc_port_remove(struct vio_dev *vdev) del_timer_sync(&port->vio.timer); del_gendisk(port->disk); - blk_cleanup_disk(port->disk); + put_disk(port->disk); blk_mq_free_tag_set(&port->tag_set); vdc_free_tx_ring(port); diff --git a/drivers/block/swim.c b/drivers/block/swim.c index fef65a18d56f..42b4b6828690 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -783,7 +783,7 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs) if (fs->registered) del_gendisk(fs->disk); - blk_cleanup_disk(disk); + put_disk(disk); blk_mq_free_tag_set(&fs->tag_set); } diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 6c39f2c9f806..da811a7da03f 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1238,7 +1238,7 @@ static int swim3_attach(struct macio_dev *mdev, return 0; out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_free_tag_set: blk_mq_free_tag_set(&fs->tag_set); out_unregister: diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 75057dbbcfbe..0e1a484cab0b 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -1377,7 +1377,7 @@ static void carm_free_disk(struct carm_host *host, unsigned int port_no) if (host->state > HST_DEV_ACTIVATE) del_gendisk(disk); - blk_cleanup_disk(disk); + put_disk(disk); } static int carm_init_shm(struct carm_host *host) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index cff1b6f6b054..d7d72e8f6e55 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1089,7 +1089,7 @@ static int virtblk_probe(struct virtio_device *vdev) return 0; out_cleanup_disk: - blk_cleanup_disk(vblk->disk); + put_disk(vblk->disk); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_free_vq: diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 33f04ef78984..3bc80f35418b 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2384,7 +2384,7 @@ static void blkfront_connect(struct blkfront_info *info) err = device_add_disk(&info->xbdev->dev, info->gd, NULL); if (err) { - blk_cleanup_disk(info->gd); + put_disk(info->gd); blk_mq_free_tag_set(&info->tag_set); info->rq = NULL; goto fail; @@ -2469,7 +2469,7 @@ static int blkfront_remove(struct xenbus_device *xbdev) blkif_free(info, 0); if (info->gd) { xlbd_release_minors(info->gd->first_minor, info->gd->minors); - blk_cleanup_disk(info->gd); + put_disk(info->gd); blk_mq_free_tag_set(&info->tag_set); } diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 18ad43d9933e..c1e85f356e4d 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c @@ -337,7 +337,7 @@ static int z2ram_register_disk(int minor) z2ram_gendisk[minor] = disk; err = add_disk(disk); if (err) - blk_cleanup_disk(disk); + put_disk(disk); return err; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index b8549c61ff2c..e5233c911e43 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1957,7 +1957,7 @@ static int zram_add(void) return device_id; out_cleanup_disk: - blk_cleanup_disk(zram->disk); + put_disk(zram->disk); out_free_idr: idr_remove(&zram_index_idr, device_id); out_free_dev: @@ -2008,7 +2008,7 @@ static int zram_remove(struct zram *zram) */ zram_reset_device(zram); - blk_cleanup_disk(zram->disk); + put_disk(zram->disk); kfree(zram); return 0; } diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index f4cc90ea6198..ceded5772aac 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -817,7 +817,7 @@ probe_fail_free_irqs: free_irq(HW_EVENT_GDROM_DMA, &gd); free_irq(HW_EVENT_GDROM_CMD, &gd); probe_fail_cleanup_disk: - blk_cleanup_disk(gd.disk); + put_disk(gd.disk); probe_fail_free_tag_set: blk_mq_free_tag_set(&gd.tag_set); probe_fail_free_cd_info: diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 3563d15dbaf2..9dd752d272f6 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -884,7 +884,7 @@ static void bcache_device_free(struct bcache_device *d) if (disk) { ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); - blk_cleanup_disk(disk); + put_disk(disk); } bioset_exit(&d->bio_split); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4c04a980fcd9..8872f9c63688 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1894,7 +1894,7 @@ static void cleanup_mapped_device(struct mapped_device *md) del_gendisk(md->disk); } dm_queue_destroy_crypto_profile(md->queue); - blk_cleanup_disk(md->disk); + put_disk(md->disk); } if (md->pending_io) { diff --git a/drivers/md/md.c b/drivers/md/md.c index c7ecb0bffda0..076255ec9ba1 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5579,7 +5579,7 @@ static void md_free(struct kobject *ko) if (mddev->gendisk) { del_gendisk(mddev->gendisk); - blk_cleanup_disk(mddev->gendisk); + put_disk(mddev->gendisk); } percpu_ref_exit(&mddev->writes_pending); @@ -5718,7 +5718,7 @@ static int md_alloc(dev_t dev, char *name) out_del_gendisk: del_gendisk(disk); out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_unlock_disks_mutex: mutex_unlock(&disks_mutex); mddev_put(mddev); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index ba7e7249a3db..ed9a683b3ca8 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -2129,7 +2129,7 @@ static int msb_init_disk(struct memstick_dev *card) return 0; out_cleanup_disk: - blk_cleanup_disk(msb->disk); + put_disk(msb->disk); out_free_tag_set: blk_mq_free_tag_set(&msb->tag_set); out_release_id: diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index 72e91c06c618..61cf75d4a01e 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -1209,7 +1209,7 @@ static int mspro_block_init_disk(struct memstick_dev *card) return 0; out_cleanup_disk: - blk_cleanup_disk(msb->disk); + put_disk(msb->disk); out_free_tag_set: blk_mq_free_tag_set(&msb->tag_set); out_release_id: diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index f73172111465..60b222799871 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -29,7 +29,7 @@ static void blktrans_dev_release(struct kref *kref) struct mtd_blktrans_dev *dev = container_of(kref, struct mtd_blktrans_dev, ref); - blk_cleanup_disk(dev->disk); + put_disk(dev->disk); blk_mq_free_tag_set(dev->tag_set); kfree(dev->tag_set); list_del(&dev->list); @@ -398,7 +398,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) return 0; out_cleanup_disk: - blk_cleanup_disk(new->disk); + put_disk(new->disk); out_free_tag_set: blk_mq_free_tag_set(new->tag_set); out_kfree_tag_set: diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index a78fdf3b30f7..4cf67a2a0d04 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c @@ -467,7 +467,7 @@ out_destroy_wq: out_remove_minor: idr_remove(&ubiblock_minor_idr, gd->first_minor); out_cleanup_disk: - blk_cleanup_disk(dev->gd); + put_disk(dev->gd); out_free_tags: blk_mq_free_tag_set(&dev->tag_set); out_free_dev: @@ -486,7 +486,7 @@ static void ubiblock_cleanup(struct ubiblock *dev) destroy_workqueue(dev->wq); /* Finally destroy the blk queue */ dev_info(disk_to_dev(dev->gd), "released"); - blk_cleanup_disk(dev->gd); + put_disk(dev->gd); blk_mq_free_tag_set(&dev->tag_set); idr_remove(&ubiblock_minor_idr, dev->gd->first_minor); } diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 9613e54c7a67..5e622c0d4b66 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1548,14 +1548,14 @@ static int btt_blk_init(struct btt *btt) return 0; out_cleanup_disk: - blk_cleanup_disk(btt->btt_disk); + put_disk(btt->btt_disk); return rc; } static void btt_blk_cleanup(struct btt *btt) { del_gendisk(btt->btt_disk); - blk_cleanup_disk(btt->btt_disk); + put_disk(btt->btt_disk); } /** diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 629d10fcf53b..a72b81fa3242 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -450,7 +450,7 @@ static void pmem_release_disk(void *__pmem) put_dax(pmem->dax_dev); del_gendisk(pmem->disk); - blk_cleanup_disk(pmem->disk); + put_disk(pmem->disk); } static int pmem_attach_disk(struct device *dev, @@ -596,7 +596,7 @@ out_cleanup_dax: kill_dax(pmem->dax_dev); put_dax(pmem->dax_dev); out: - blk_cleanup_disk(pmem->disk); + put_disk(pmem->disk); return rc; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4e3a0f7bfc9c..b5b24998a5ab 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4061,7 +4061,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, mutex_unlock(&ctrl->subsys->lock); nvme_put_ns_head(ns->head); out_cleanup_disk: - blk_cleanup_disk(disk); + put_disk(disk); out_free_ns: kfree(ns); out_free_id: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d3e2440d8abb..ccf9a6da8f6e 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -853,7 +853,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); - blk_cleanup_disk(head->disk); + put_disk(head->disk); } void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 8d0d0eaa3059..4d8d1759775a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -414,7 +414,7 @@ removeseg: kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); up_write(&dcssblk_devices_sem); if (device_remove_file_self(dev, attr)) { @@ -712,7 +712,7 @@ out_dax: put_dax(dev_info->dax_dev); put_dev: list_del(&dev_info->lh); - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); list_for_each_entry(seg_info, &dev_info->seg_list, lh) { segment_unload(seg_info->segment_name); } @@ -722,7 +722,7 @@ put_dev: dev_list_del: list_del(&dev_info->lh); release_gd: - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); up_write(&dcssblk_devices_sem); seg_list_del: if (dev_info == NULL) @@ -790,7 +790,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch kill_dax(dev_info->dax_dev); put_dax(dev_info->dax_dev); del_gendisk(dev_info->gd); - blk_cleanup_disk(dev_info->gd); + put_disk(dev_info->gd); /* unload all related segments */ list_for_each_entry(entry, &dev_info->seg_list, lh) diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c index 2a9c0ddcade5..0c1df1d5f1ac 100644 --- a/drivers/s390/block/scm_blk.c +++ b/drivers/s390/block/scm_blk.c @@ -501,7 +501,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev) return 0; out_cleanup_disk: - blk_cleanup_disk(bdev->gendisk); + put_disk(bdev->gendisk); out_tag: blk_mq_free_tag_set(&bdev->tag_set); out: @@ -512,7 +512,7 @@ out: void scm_blk_dev_cleanup(struct scm_blk_dev *bdev) { del_gendisk(bdev->gendisk); - blk_cleanup_disk(bdev->gendisk); + put_disk(bdev->gendisk); blk_mq_free_tag_set(&bdev->tag_set); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 530eeccffda3..22b12531aeb7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -834,7 +834,6 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); \ __blk_alloc_disk(node_id, &__key); \ }) -void blk_cleanup_disk(struct gendisk *disk); int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)); From cc5c516df028b221d94c65c47c5ae8d20f61b6f9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:45 +0200 Subject: [PATCH 042/178] block: simplify blktrace sysfs attribute creation Add the trace attributes to the default gendisk attributes, just like we already do for partitions. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 11 +---------- block/blk.h | 2 ++ block/genhd.c | 3 +++ block/partitions/core.c | 1 - include/linux/blktrace_api.h | 10 ---------- kernel/trace/blktrace.c | 11 ----------- 6 files changed, 6 insertions(+), 32 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9b211e519de8..5f3f73115988 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -810,21 +810,14 @@ int blk_register_queue(struct gendisk *disk) struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; - ret = blk_trace_init_sysfs(dev); - if (ret) - return ret; - mutex_lock(&q->sysfs_dir_lock); ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); - if (ret < 0) { - blk_trace_remove_sysfs(dev); + if (ret < 0) goto unlock; - } ret = sysfs_create_group(&q->kobj, &queue_attr_group); if (ret) { - blk_trace_remove_sysfs(dev); kobject_del(&q->kobj); kobject_put(&dev->kobj); goto unlock; @@ -890,7 +883,6 @@ put_dev: mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_del(&q->kobj); - blk_trace_remove_sysfs(dev); kobject_put(&dev->kobj); return ret; @@ -931,7 +923,6 @@ void blk_unregister_queue(struct gendisk *disk) if (queue_is_mq(q)) blk_mq_unregister_dev(disk_to_dev(disk), q); blk_crypto_sysfs_unregister(q); - blk_trace_remove_sysfs(disk_to_dev(disk)); mutex_lock(&q->sysfs_lock); elv_unregister_queue(q); diff --git a/block/blk.h b/block/blk.h index 1a0d3e6a4a63..74d59435870c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -452,6 +452,8 @@ extern struct device_attribute dev_attr_events; extern struct device_attribute dev_attr_events_async; extern struct device_attribute dev_attr_events_poll_msecs; +extern struct attribute_group blk_trace_attr_group; + long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); diff --git a/block/genhd.c b/block/genhd.c index bf9be06af2c8..b1fb7e058b9c 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1134,6 +1134,9 @@ static struct attribute_group disk_attr_group = { static const struct attribute_group *disk_attr_groups[] = { &disk_attr_group, +#ifdef CONFIG_BLK_DEV_IO_TRACE + &blk_trace_attr_group, +#endif NULL }; diff --git a/block/partitions/core.c b/block/partitions/core.c index 8a0ec929023b..7dc487f5b03c 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "check.h" diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 623e22492afa..f6f9b544365a 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -77,10 +77,6 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); extern int blk_trace_remove(struct request_queue *q); -extern void blk_trace_remove_sysfs(struct device *dev); -extern int blk_trace_init_sysfs(struct device *dev); - -extern struct attribute_group blk_trace_attr_group; #else /* !CONFIG_BLK_DEV_IO_TRACE */ # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) @@ -91,13 +87,7 @@ extern struct attribute_group blk_trace_attr_group; # define blk_trace_remove(q) (-ENOTTY) # define blk_add_trace_msg(q, fmt, ...) do { } while (0) # define blk_add_cgroup_trace_msg(q, cg, fmt, ...) do { } while (0) -# define blk_trace_remove_sysfs(dev) do { } while (0) # define blk_trace_note_message_enabled(q) (false) -static inline int blk_trace_init_sysfs(struct device *dev) -{ - return 0; -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_COMPAT diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fe04c6f96ca5..c584effe5fe9 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1867,17 +1867,6 @@ out_unlock_bdev: out: return ret ? ret : count; } - -int blk_trace_init_sysfs(struct device *dev) -{ - return sysfs_create_group(&dev->kobj, &blk_trace_attr_group); -} - -void blk_trace_remove_sysfs(struct device *dev) -{ - sysfs_remove_group(&dev->kobj, &blk_trace_attr_group); -} - #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING From 060f131e9c438837f9792e456fae424e621fb881 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:46 +0200 Subject: [PATCH 043/178] block: remove a superflous queue kobject reference kobject_add already adds a reference to the parent that is dropped on deletion, so don't bother grabbing another one. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5f3f73115988..f9373da591b8 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -812,14 +812,13 @@ int blk_register_queue(struct gendisk *disk) mutex_lock(&q->sysfs_dir_lock); - ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); + ret = kobject_add(&q->kobj, &dev->kobj, "%s", "queue"); if (ret < 0) goto unlock; ret = sysfs_create_group(&q->kobj, &queue_attr_group); if (ret) { kobject_del(&q->kobj); - kobject_put(&dev->kobj); goto unlock; } @@ -883,7 +882,6 @@ put_dev: mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_del(&q->kobj); - kobject_put(&dev->kobj); return ret; } @@ -941,6 +939,4 @@ void blk_unregister_queue(struct gendisk *disk) q->sched_debugfs_dir = NULL; q->rqos_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex); - - kobject_put(&disk_to_dev(disk)->kobj); } From 4a8d14bba486cca6880062f1ef240cf1d45f3367 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:47 +0200 Subject: [PATCH 044/178] block: use default groups to register the queue attributes Set up the default_groups for blk_queue_ktype instead of manually calling sysfs_create_group. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-4-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f9373da591b8..b72506770b97 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -795,7 +795,13 @@ static const struct sysfs_ops queue_sysfs_ops = { .store = queue_attr_store, }; +static const struct attribute_group *blk_queue_attr_groups[] = { + &queue_attr_group, + NULL +}; + struct kobj_type blk_queue_ktype = { + .default_groups = blk_queue_attr_groups, .sysfs_ops = &queue_sysfs_ops, .release = blk_release_queue, }; @@ -816,12 +822,6 @@ int blk_register_queue(struct gendisk *disk) if (ret < 0) goto unlock; - ret = sysfs_create_group(&q->kobj, &queue_attr_group); - if (ret) { - kobject_del(&q->kobj); - goto unlock; - } - if (queue_is_mq(q)) __blk_mq_register_dev(dev, q); mutex_lock(&q->sysfs_lock); From 81f0c2ef41b02185928563899cd4d618ffc7eebf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:48 +0200 Subject: [PATCH 045/178] block: remove the extra gendisk reference in __blk_mq_register_dev kobject_add already grabs a reference to the parent, no need to have another one. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index c08426975856..f4caaa668e3c 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -215,7 +215,6 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); - kobject_put(&dev->kobj); q->mq_sysfs_init_done = false; } @@ -261,7 +260,7 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q) WARN_ON_ONCE(!q->kobj.parent); lockdep_assert_held(&q->sysfs_dir_lock); - ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); + ret = kobject_add(q->mq_kobj, &dev->kobj, "%s", "mq"); if (ret < 0) goto out; @@ -286,7 +285,6 @@ unreg: kobject_uevent(q->mq_kobj, KOBJ_REMOVE); kobject_del(q->mq_kobj); - kobject_put(&dev->kobj); return ret; } From eaa870f97544668025ba1f96ee267abac7b3c73c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:49 +0200 Subject: [PATCH 046/178] blk-mq: rename blk_mq_sysfs_{,un}register Add a _hctx postfix to better describe what the functions do, match the debugfs equivalents and release the old names for functions that should be using them. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-6-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 4 ++-- block/blk-mq.c | 4 ++-- block/blk-mq.h | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index f4caaa668e3c..ee6efe2b250d 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -288,7 +288,7 @@ unreg: return ret; } -void blk_mq_sysfs_unregister(struct request_queue *q) +void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i; @@ -304,7 +304,7 @@ unlock: mutex_unlock(&q->sysfs_dir_lock); } -int blk_mq_sysfs_register(struct request_queue *q) +int blk_mq_sysfs_register_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i; diff --git a/block/blk-mq.c b/block/blk-mq.c index b1dbc4b2c2c9..15c7c5c4ad22 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -4559,7 +4559,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_debugfs_unregister_hctxs(q); - blk_mq_sysfs_unregister(q); + blk_mq_sysfs_unregister_hctxs(q); } prev_nr_hw_queues = set->nr_hw_queues; @@ -4590,7 +4590,7 @@ fallback: reregister: list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_sysfs_register(q); + blk_mq_sysfs_register_hctxs(q); blk_mq_debugfs_register_hctxs(q); } diff --git a/block/blk-mq.h b/block/blk-mq.h index e4c6fe2c8ac8..a92639f2bfd2 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -119,8 +119,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, extern void blk_mq_sysfs_init(struct request_queue *q); extern void blk_mq_sysfs_deinit(struct request_queue *q); extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q); -extern int blk_mq_sysfs_register(struct request_queue *q); -extern void blk_mq_sysfs_unregister(struct request_queue *q); +int blk_mq_sysfs_register_hctxs(struct request_queue *q); +void blk_mq_sysfs_unregister_hctxs(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); void blk_mq_free_plug_rqs(struct blk_plug *plug); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); From 8682b92e5ab852b93739a0f2b261fff4c733be57 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 28 Jun 2022 19:18:50 +0200 Subject: [PATCH 047/178] blk-mq: cleanup disk sysfs registration Pass a gendisk to the sysfs register/unregister functions and give them descriptive names. Also move the unregistration helper next to the one doing the registration. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20220628171850.1313069-7-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-sysfs.c | 39 ++++++++++++++++++++------------------- block/blk-mq.h | 3 ++- block/blk-sysfs.c | 9 ++++----- include/linux/blk-mq.h | 1 - 4 files changed, 26 insertions(+), 26 deletions(-) diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index ee6efe2b250d..93997d297d42 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -203,22 +203,6 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) return ret; } -void blk_mq_unregister_dev(struct device *dev, struct request_queue *q) -{ - struct blk_mq_hw_ctx *hctx; - unsigned long i; - - lockdep_assert_held(&q->sysfs_dir_lock); - - queue_for_each_hw_ctx(q, hctx, i) - blk_mq_unregister_hctx(hctx); - - kobject_uevent(q->mq_kobj, KOBJ_REMOVE); - kobject_del(q->mq_kobj); - - q->mq_sysfs_init_done = false; -} - void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx) { kobject_init(&hctx->kobj, &blk_mq_hw_ktype); @@ -251,16 +235,16 @@ void blk_mq_sysfs_init(struct request_queue *q) } } -int __blk_mq_register_dev(struct device *dev, struct request_queue *q) +int blk_mq_sysfs_register(struct gendisk *disk) { + struct request_queue *q = disk->queue; struct blk_mq_hw_ctx *hctx; unsigned long i, j; int ret; - WARN_ON_ONCE(!q->kobj.parent); lockdep_assert_held(&q->sysfs_dir_lock); - ret = kobject_add(q->mq_kobj, &dev->kobj, "%s", "mq"); + ret = kobject_add(q->mq_kobj, &disk_to_dev(disk)->kobj, "mq"); if (ret < 0) goto out; @@ -288,6 +272,23 @@ unreg: return ret; } +void blk_mq_sysfs_unregister(struct gendisk *disk) +{ + struct request_queue *q = disk->queue; + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + lockdep_assert_held(&q->sysfs_dir_lock); + + queue_for_each_hw_ctx(q, hctx, i) + blk_mq_unregister_hctx(hctx); + + kobject_uevent(q->mq_kobj, KOBJ_REMOVE); + kobject_del(q->mq_kobj); + + q->mq_sysfs_init_done = false; +} + void blk_mq_sysfs_unregister_hctxs(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; diff --git a/block/blk-mq.h b/block/blk-mq.h index a92639f2bfd2..54e20edf0da3 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -118,7 +118,8 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, */ extern void blk_mq_sysfs_init(struct request_queue *q); extern void blk_mq_sysfs_deinit(struct request_queue *q); -extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q); +int blk_mq_sysfs_register(struct gendisk *disk); +void blk_mq_sysfs_unregister(struct gendisk *disk); int blk_mq_sysfs_register_hctxs(struct request_queue *q); void blk_mq_sysfs_unregister_hctxs(struct request_queue *q); extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index b72506770b97..85ea43eff094 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -812,18 +812,17 @@ struct kobj_type blk_queue_ktype = { */ int blk_register_queue(struct gendisk *disk) { - int ret; - struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; + int ret; mutex_lock(&q->sysfs_dir_lock); - ret = kobject_add(&q->kobj, &dev->kobj, "%s", "queue"); + ret = kobject_add(&q->kobj, &disk_to_dev(disk)->kobj, "queue"); if (ret < 0) goto unlock; if (queue_is_mq(q)) - __blk_mq_register_dev(dev, q); + blk_mq_sysfs_register(disk); mutex_lock(&q->sysfs_lock); mutex_lock(&q->debugfs_mutex); @@ -919,7 +918,7 @@ void blk_unregister_queue(struct gendisk *disk) * structures that can be modified through sysfs. */ if (queue_is_mq(q)) - blk_mq_unregister_dev(disk_to_dev(disk), q); + blk_mq_sysfs_unregister(disk); blk_crypto_sysfs_unregister(q); mutex_lock(&q->sysfs_lock); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0fd96e92c6c6..43aad0da3305 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -691,7 +691,6 @@ struct gendisk *blk_mq_alloc_disk_for_queue(struct request_queue *q, struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -void blk_mq_unregister_dev(struct device *, struct request_queue *); void blk_mq_destroy_queue(struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); From b9a1c179bdfa133d28ab8b7d30631b0accdc2057 Mon Sep 17 00:00:00 2001 From: Ying Sun Date: Wed, 29 Jun 2022 14:24:09 +0800 Subject: [PATCH 048/178] block: remove "select BLK_RQ_IO_DATA_LEN" from BLK_CGROUP_IOCOST dependency The configuration item BLK_RQ_IO_DATA_LEN is not declared in the kernel. Select BLK_RQ_IO_DATA_LEN is meaningless which could be removed. Signed-off-by: Ying Sun Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220629062409.19458-1-sunying@nj.iscas.ac.cn Signed-off-by: Jens Axboe --- block/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/block/Kconfig b/block/Kconfig index 50b17e260fa2..444c5ab3b67e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -147,7 +147,6 @@ config BLK_CGROUP_FC_APPID config BLK_CGROUP_IOCOST bool "Enable support for cost model based cgroup IO controller" depends on BLK_CGROUP - select BLK_RQ_IO_DATA_LEN select BLK_RQ_ALLOC_TIME help Enabling this option enables the .weight interface for cost From 6a27d28c81bc5843de2490688a04ee5baa6615e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Jun 2022 08:20:12 +0200 Subject: [PATCH 049/178] block: move ->ia_ranges from the request_queue to the gendisk Independent access ranges only matter for file system I/O and are only valid with a registered gendisk, so move them there. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20220629062013.1331068-2-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-ia-ranges.c | 18 +++++++++--------- include/linux/blkdev.h | 12 ++++++------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index 47c89e65b57f..c1bf14bcd15f 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -106,7 +106,7 @@ static struct kobj_type blk_ia_ranges_ktype = { * * Register with sysfs a set of independent access ranges for @disk. * If @new_iars is not NULL, this set of ranges is registered and the old set - * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is + * specified by disk->ia_ranges is unregistered. Otherwise, disk->ia_ranges is * registered if it is not already. */ int disk_register_independent_access_ranges(struct gendisk *disk, @@ -121,12 +121,12 @@ int disk_register_independent_access_ranges(struct gendisk *disk, /* If a new range set is specified, unregister the old one */ if (new_iars) { - if (q->ia_ranges) + if (disk->ia_ranges) disk_unregister_independent_access_ranges(disk); - q->ia_ranges = new_iars; + disk->ia_ranges = new_iars; } - iars = q->ia_ranges; + iars = disk->ia_ranges; if (!iars) return 0; @@ -138,7 +138,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk, ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype, &q->kobj, "%s", "independent_access_ranges"); if (ret) { - q->ia_ranges = NULL; + disk->ia_ranges = NULL; kobject_put(&iars->kobj); return ret; } @@ -164,7 +164,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk, void disk_unregister_independent_access_ranges(struct gendisk *disk) { struct request_queue *q = disk->queue; - struct blk_independent_access_ranges *iars = q->ia_ranges; + struct blk_independent_access_ranges *iars = disk->ia_ranges; int i; lockdep_assert_held(&q->sysfs_dir_lock); @@ -182,7 +182,7 @@ void disk_unregister_independent_access_ranges(struct gendisk *disk) kfree(iars); } - q->ia_ranges = NULL; + disk->ia_ranges = NULL; } static struct blk_independent_access_range * @@ -242,7 +242,7 @@ static bool disk_check_ia_ranges(struct gendisk *disk, static bool disk_ia_ranges_changed(struct gendisk *disk, struct blk_independent_access_ranges *new) { - struct blk_independent_access_ranges *old = disk->queue->ia_ranges; + struct blk_independent_access_ranges *old = disk->ia_ranges; int i; if (!old) @@ -331,7 +331,7 @@ reg: if (blk_queue_registered(q)) { disk_register_independent_access_ranges(disk, iars); } else { - swap(q->ia_ranges, iars); + swap(disk->ia_ranges, iars); kfree(iars); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22b12531aeb7..b9a94c53c6cd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -171,6 +171,12 @@ struct gendisk { struct badblocks *bb; struct lockdep_map lockdep_map; u64 diskseq; + + /* + * Independent sector access ranges. This is always NULL for + * devices that do not have multiple independent access ranges. + */ + struct blk_independent_access_ranges *ia_ranges; }; static inline bool disk_live(struct gendisk *disk) @@ -539,12 +545,6 @@ struct request_queue { bool mq_sysfs_init_done; - /* - * Independent sector access ranges. This is always NULL for - * devices that do not have multiple independent access ranges. - */ - struct blk_independent_access_ranges *ia_ranges; - /** * @srcu: Sleepable RCU. Use as lock when type of the request queue * is blocking (BLK_MQ_F_BLOCKING). Must be the last member From 22d0c4080fe49299640d9d6c43154c49794c2825 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 29 Jun 2022 08:20:13 +0200 Subject: [PATCH 050/178] block: simplify disk_set_independent_access_ranges Lift setting disk->ia_ranges from disk_register_independent_access_ranges into disk_set_independent_access_ranges, and make the behavior the same for the registered vs non-registered queue cases. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Link: https://lore.kernel.org/r/20220629062013.1331068-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-ia-ranges.c | 57 ++++++++++++------------------------------- block/blk-sysfs.c | 2 +- block/blk.h | 3 +-- 3 files changed, 18 insertions(+), 44 deletions(-) diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index c1bf14bcd15f..2bd1d311033b 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -102,31 +102,18 @@ static struct kobj_type blk_ia_ranges_ktype = { * disk_register_independent_access_ranges - register with sysfs a set of * independent access ranges * @disk: Target disk - * @new_iars: New set of independent access ranges * * Register with sysfs a set of independent access ranges for @disk. - * If @new_iars is not NULL, this set of ranges is registered and the old set - * specified by disk->ia_ranges is unregistered. Otherwise, disk->ia_ranges is - * registered if it is not already. */ -int disk_register_independent_access_ranges(struct gendisk *disk, - struct blk_independent_access_ranges *new_iars) +int disk_register_independent_access_ranges(struct gendisk *disk) { + struct blk_independent_access_ranges *iars = disk->ia_ranges; struct request_queue *q = disk->queue; - struct blk_independent_access_ranges *iars; int i, ret; lockdep_assert_held(&q->sysfs_dir_lock); lockdep_assert_held(&q->sysfs_lock); - /* If a new range set is specified, unregister the old one */ - if (new_iars) { - if (disk->ia_ranges) - disk_unregister_independent_access_ranges(disk); - disk->ia_ranges = new_iars; - } - - iars = disk->ia_ranges; if (!iars) return 0; @@ -210,6 +197,9 @@ static bool disk_check_ia_ranges(struct gendisk *disk, sector_t sector = 0; int i; + if (WARN_ON_ONCE(!iars->nr_ia_ranges)) + return false; + /* * While sorting the ranges in increasing LBA order, check that the * ranges do not overlap, that there are no sector holes and that all @@ -298,25 +288,15 @@ void disk_set_independent_access_ranges(struct gendisk *disk, { struct request_queue *q = disk->queue; - if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) { + mutex_lock(&q->sysfs_dir_lock); + mutex_lock(&q->sysfs_lock); + if (iars && !disk_check_ia_ranges(disk, iars)) { kfree(iars); iars = NULL; } - - mutex_lock(&q->sysfs_dir_lock); - mutex_lock(&q->sysfs_lock); - - if (iars) { - if (!disk_check_ia_ranges(disk, iars)) { - kfree(iars); - iars = NULL; - goto reg; - } - - if (!disk_ia_ranges_changed(disk, iars)) { - kfree(iars); - goto unlock; - } + if (iars && !disk_ia_ranges_changed(disk, iars)) { + kfree(iars); + goto unlock; } /* @@ -324,17 +304,12 @@ void disk_set_independent_access_ranges(struct gendisk *disk, * revalidation. If that is the case, we need to unregister the old * set of independent access ranges and register the new set. If the * queue is not registered, registration of the device request queue - * will register the independent access ranges, so only swap in the - * new set and free the old one. + * will register the independent access ranges. */ -reg: - if (blk_queue_registered(q)) { - disk_register_independent_access_ranges(disk, iars); - } else { - swap(disk->ia_ranges, iars); - kfree(iars); - } - + disk_unregister_independent_access_ranges(disk); + disk->ia_ranges = iars; + if (blk_queue_registered(q)) + disk_register_independent_access_ranges(disk); unlock: mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 85ea43eff094..58cb9cb9f48c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -832,7 +832,7 @@ int blk_register_queue(struct gendisk *disk) blk_mq_debugfs_register(q); mutex_unlock(&q->debugfs_mutex); - ret = disk_register_independent_access_ranges(disk, NULL); + ret = disk_register_independent_access_ranges(disk); if (ret) goto put_dev; diff --git a/block/blk.h b/block/blk.h index 74d59435870c..58ad50cacd2d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -459,8 +459,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); extern const struct address_space_operations def_blk_aops; -int disk_register_independent_access_ranges(struct gendisk *disk, - struct blk_independent_access_ranges *new_iars); +int disk_register_independent_access_ranges(struct gendisk *disk); void disk_unregister_independent_access_ranges(struct gendisk *disk); #ifdef CONFIG_FAIL_MAKE_REQUEST From 362b8c16f8fc73fddfe4bded25055fa0c9e2bf1e Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 29 Jun 2022 15:09:16 +0800 Subject: [PATCH 051/178] blk-cgroup: factor out blkcg_iostat_update() To reduce some duplicated code, factor out blkcg_iostat_update(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220629070917.3113016-2-yanaijie@huawei.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6906981563f8..34a9452f93a7 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -846,6 +846,21 @@ static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src) } } +static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur, + struct blkg_iostat *last) +{ + struct blkg_iostat delta; + unsigned long flags; + + /* propagate percpu delta to global */ + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); + blkg_iostat_set(&delta, cur); + blkg_iostat_sub(&delta, last); + blkg_iostat_add(&blkg->iostat.cur, &delta); + blkg_iostat_add(last, &delta); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); +} + static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) { struct blkcg *blkcg = css_to_blkcg(css); @@ -860,8 +875,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { struct blkcg_gq *parent = blkg->parent; struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); - struct blkg_iostat cur, delta; - unsigned long flags; + struct blkg_iostat cur; unsigned int seq; /* fetch the current per-cpu values */ @@ -870,23 +884,12 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) blkg_iostat_set(&cur, &bisc->cur); } while (u64_stats_fetch_retry(&bisc->sync, seq)); - /* propagate percpu delta to global */ - flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); - blkg_iostat_set(&delta, &cur); - blkg_iostat_sub(&delta, &bisc->last); - blkg_iostat_add(&blkg->iostat.cur, &delta); - blkg_iostat_add(&bisc->last, &delta); - u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); + blkcg_iostat_update(blkg, &cur, &bisc->last); /* propagate global delta to parent (unless that's root) */ - if (parent && parent->parent) { - flags = u64_stats_update_begin_irqsave(&parent->iostat.sync); - blkg_iostat_set(&delta, &blkg->iostat.cur); - blkg_iostat_sub(&delta, &blkg->iostat.last); - blkg_iostat_add(&parent->iostat.cur, &delta); - blkg_iostat_add(&blkg->iostat.last, &delta); - u64_stats_update_end_irqrestore(&parent->iostat.sync, flags); - } + if (parent && parent->parent) + blkcg_iostat_update(parent, &blkg->iostat.cur, + &blkg->iostat.last); } rcu_read_unlock(); From e55cf798140518b900e5254093f1195f65c23026 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Wed, 29 Jun 2022 15:09:17 +0800 Subject: [PATCH 052/178] blk-cgroup: factor out blkcg_free_all_cpd() To reduce some duplicated code, factor out blkcg_free_all_cpd(). No functional change. Signed-off-by: Jason Yan Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220629070917.3113016-3-yanaijie@huawei.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 34a9452f93a7..27a2d0ca0c70 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1533,6 +1533,18 @@ void blkcg_deactivate_policy(struct request_queue *q, } EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); +static void blkcg_free_all_cpd(struct blkcg_policy *pol) +{ + struct blkcg *blkcg; + + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + if (blkcg->cpd[pol->plid]) { + pol->cpd_free_fn(blkcg->cpd[pol->plid]); + blkcg->cpd[pol->plid] = NULL; + } + } +} + /** * blkcg_policy_register - register a blkcg policy * @pol: blkcg policy to register @@ -1597,14 +1609,9 @@ int blkcg_policy_register(struct blkcg_policy *pol) return 0; err_free_cpds: - if (pol->cpd_free_fn) { - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { - if (blkcg->cpd[pol->plid]) { - pol->cpd_free_fn(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; - } - } - } + if (pol->cpd_free_fn) + blkcg_free_all_cpd(pol); + blkcg_policy[pol->plid] = NULL; err_unlock: mutex_unlock(&blkcg_pol_mutex); @@ -1621,8 +1628,6 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { - struct blkcg *blkcg; - mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) @@ -1637,14 +1642,9 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) /* remove cpds and unregister */ mutex_lock(&blkcg_pol_mutex); - if (pol->cpd_free_fn) { - list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { - if (blkcg->cpd[pol->plid]) { - pol->cpd_free_fn(blkcg->cpd[pol->plid]); - blkcg->cpd[pol->plid] = NULL; - } - } - } + if (pol->cpd_free_fn) + blkcg_free_all_cpd(pol); + blkcg_policy[pol->plid] = NULL; mutex_unlock(&blkcg_pol_mutex); From deef1be18e3fc62ddf04fb3e5e8ff6a301693dcc Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:49 +0800 Subject: [PATCH 053/178] scsi: core: Remove reserved request time-out handling The SCSI core code does not currently support reserved commands. As such, requests which time-out would never be reserved, and scsi_timeout() 'reserved' arg should never be set. Remove handling for reserved requests, drop the wrapper scsi_timeout() as it now just calls scsi_times_out() always, and finally rename scsi_times_out() -> scsi_timeout() to match the blk_mq_ops method name. Signed-off-by: John Garry Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-2-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- Documentation/scsi/scsi_eh.rst | 3 +-- Documentation/scsi/scsi_mid_low_api.rst | 2 +- drivers/scsi/scsi_error.c | 7 ++++--- drivers/scsi/scsi_lib.c | 8 -------- drivers/scsi/scsi_priv.h | 3 ++- 5 files changed, 8 insertions(+), 15 deletions(-) diff --git a/Documentation/scsi/scsi_eh.rst b/Documentation/scsi/scsi_eh.rst index 885395dc1f15..bad624fab823 100644 --- a/Documentation/scsi/scsi_eh.rst +++ b/Documentation/scsi/scsi_eh.rst @@ -87,8 +87,7 @@ with the command. 1.2.2 Completing a scmd w/ timeout ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The timeout handler is scsi_times_out(). When a timeout occurs, this -function +The timeout handler is scsi_timeout(). When a timeout occurs, this function 1. invokes optional hostt->eh_timed_out() callback. Return value can be one of diff --git a/Documentation/scsi/scsi_mid_low_api.rst b/Documentation/scsi/scsi_mid_low_api.rst index 63ddea2b9640..a8c5bd15a440 100644 --- a/Documentation/scsi/scsi_mid_low_api.rst +++ b/Documentation/scsi/scsi_mid_low_api.rst @@ -731,7 +731,7 @@ Details:: * Notes: If 'no_async_abort' is defined this callback * will be invoked from scsi_eh thread. No other commands * will then be queued on current host during eh. - * Otherwise it will be called whenever scsi_times_out() + * Otherwise it will be called whenever scsi_timeout() * is called due to a command timeout. * * Optionally defined in: LLD diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 49ef864df581..a8b71b73a5a5 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -139,7 +139,7 @@ static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd) * * Note: this function must be called only for a command that has timed out. * Because the block layer marks a request as complete before it calls - * scsi_times_out(), a .scsi_done() call from the LLD for a command that has + * scsi_timeout(), a .scsi_done() call from the LLD for a command that has * timed out do not have any effect. Hence it is safe to call * scsi_finish_command() from this function. */ @@ -316,8 +316,9 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) } /** - * scsi_times_out - Timeout function for normal scsi commands. + * scsi_timeout - Timeout function for normal scsi commands. * @req: request that is timing out. + * @reserved: whether the request is a reserved request. * * Notes: * We do not need to lock this. There is the potential for a race @@ -325,7 +326,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ -enum blk_eh_timer_return scsi_times_out(struct request *req) +enum blk_eh_timer_return scsi_timeout(struct request *req, bool reserved) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); enum blk_eh_timer_return rtn = BLK_EH_DONE; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cdf0056582d5..1b3ca5c16c3d 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1790,14 +1790,6 @@ out_put_budget: return ret; } -static enum blk_eh_timer_return scsi_timeout(struct request *req, - bool reserved) -{ - if (reserved) - return BLK_EH_RESET_TIMER; - return scsi_times_out(req); -} - static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 5c4786310a31..695d0c83ffe0 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -72,7 +72,8 @@ extern void scsi_exit_devinfo(void); /* scsi_error.c */ extern void scmd_eh_abort_handler(struct work_struct *work); -extern enum blk_eh_timer_return scsi_times_out(struct request *req); +extern enum blk_eh_timer_return scsi_timeout(struct request *req, + bool reserved); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); extern void scsi_eh_wakeup(struct Scsi_Host *shost); From 99e48cd6855e9535488e3c90d65edd46c6e6fc1b Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:50 +0800 Subject: [PATCH 054/178] blk-mq: Add a flag for reserved requests Add a flag for reserved requests so that drivers may know this for any special handling. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-3-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++++ include/linux/blk-mq.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index 15c7c5c4ad22..a00e43cc67e5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -475,6 +475,9 @@ retry: if (!(data->rq_flags & RQF_ELV)) blk_mq_tag_busy(data->hctx); + if (data->flags & BLK_MQ_REQ_RESERVED) + data->rq_flags |= RQF_RESV; + /* * Try batched alloc if we want more than 1 tag. */ @@ -589,6 +592,9 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, else data.rq_flags |= RQF_ELV; + if (flags & BLK_MQ_REQ_RESERVED) + data.rq_flags |= RQF_RESV; + ret = -EWOULDBLOCK; tag = blk_mq_get_tag(&data); if (tag == BLK_MQ_NO_TAG) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 43aad0da3305..7c62b7fabec7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,6 +57,7 @@ typedef __u32 __bitwise req_flags_t; #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) /* queue has elevator attached */ #define RQF_ELV ((__force req_flags_t)(1 << 22)) +#define RQF_RESV ((__force req_flags_t)(1 << 23)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ @@ -825,6 +826,11 @@ static inline bool blk_mq_need_time_stamp(struct request *rq) return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV)); } +static inline bool blk_mq_is_reserved_rq(struct request *rq) +{ + return rq->rq_flags & RQF_RESV; +} + /* * Batched completions only work when there is no I/O error and no special * ->end_io handler. From 9bdb4833dd399cbff82cc20893f52bdec66a9eca Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:51 +0800 Subject: [PATCH 055/178] blk-mq: Drop blk_mq_ops.timeout 'reserved' arg With new API blk_mq_is_reserved_rq() we can tell if a request is from the reserved pool, so stop passing 'reserved' arg. There is actually only a single user of that arg for all the callback implementations, which can use blk_mq_is_reserved_rq() instead. This will also allow us to stop passing the same 'reserved' around the blk-mq iter functions next. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Acked-by: Ulf Hansson # For MMC Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-4-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++--- block/bsg-lib.c | 2 +- drivers/block/mtip32xx/mtip32xx.c | 5 ++--- drivers/block/nbd.c | 3 +-- drivers/block/null_blk/main.c | 2 +- drivers/mmc/core/queue.c | 3 +-- drivers/nvme/host/apple.c | 3 +-- drivers/nvme/host/fc.c | 3 +-- drivers/nvme/host/pci.c | 2 +- drivers/nvme/host/rdma.c | 3 +-- drivers/nvme/host/tcp.c | 3 +-- drivers/s390/block/dasd.c | 2 +- drivers/s390/block/dasd_int.h | 2 +- drivers/scsi/scsi_error.c | 3 +-- drivers/scsi/scsi_priv.h | 3 +-- include/linux/blk-mq.h | 2 +- 16 files changed, 19 insertions(+), 28 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a00e43cc67e5..cedbec36e907 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1427,13 +1427,13 @@ bool blk_mq_queue_inflight(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_mq_queue_inflight); -static void blk_mq_rq_timed_out(struct request *req, bool reserved) +static void blk_mq_rq_timed_out(struct request *req) { req->rq_flags |= RQF_TIMED_OUT; if (req->q->mq_ops->timeout) { enum blk_eh_timer_return ret; - ret = req->q->mq_ops->timeout(req, reserved); + ret = req->q->mq_ops->timeout(req); if (ret == BLK_EH_DONE) return; WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); @@ -1482,7 +1482,7 @@ static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved) * from blk_mq_check_expired(). */ if (blk_mq_req_expired(rq, next)) - blk_mq_rq_timed_out(rq, reserved); + blk_mq_rq_timed_out(rq); return true; } diff --git a/block/bsg-lib.c b/block/bsg-lib.c index fd4cd5e68282..d6f5dcdce748 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -331,7 +331,7 @@ void bsg_remove_queue(struct request_queue *q) } EXPORT_SYMBOL_GPL(bsg_remove_queue); -static enum blk_eh_timer_return bsg_timeout(struct request *rq, bool reserved) +static enum blk_eh_timer_return bsg_timeout(struct request *rq) { struct bsg_set *bset = container_of(rq->q->tag_set, struct bsg_set, tag_set); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index e116c6cf56f5..5073cb407500 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3357,12 +3357,11 @@ static int mtip_init_cmd(struct blk_mq_tag_set *set, struct request *rq, return 0; } -static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req, - bool reserved) +static enum blk_eh_timer_return mtip_cmd_timeout(struct request *req) { struct driver_data *dd = req->q->queuedata; - if (reserved) { + if (blk_mq_is_reserved_rq(req)) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); cmd->status = BLK_STS_TIMEOUT; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5c4c9c45c6ac..028f23c965df 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -393,8 +393,7 @@ static u32 req_to_nbd_cmd_type(struct request *req) } } -static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, - bool reserved) +static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); struct nbd_device *nbd = cmd->nbd; diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index d695ea29efa6..4e03a020ee3c 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1578,7 +1578,7 @@ static int null_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) return nr; } -static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res) +static enum blk_eh_timer_return null_timeout_rq(struct request *rq) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index f824cfdab75a..fefaa901b50f 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -116,8 +116,7 @@ static enum blk_eh_timer_return mmc_cqe_timed_out(struct request *req) } } -static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req, - bool reserved) +static enum blk_eh_timer_return mmc_mq_timed_out(struct request *req) { struct request_queue *q = req->q; struct mmc_queue *mq = q->queuedata; diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 2d23b7d41f7e..5c352d5d8ee6 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -862,8 +862,7 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) } } -static enum blk_eh_timer_return apple_nvme_timeout(struct request *req, - bool reserved) +static enum blk_eh_timer_return apple_nvme_timeout(struct request *req) { struct apple_nvme_iod *iod = blk_mq_rq_to_pdu(req); struct apple_nvme_queue *q = iod->q; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a96aa831684c..07fd6db5869c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2565,8 +2565,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) nvme_reset_ctrl(&ctrl->ctrl); } -static enum blk_eh_timer_return -nvme_fc_timeout(struct request *rq, bool reserved) +static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 247a74aba336..4232192e10dd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1344,7 +1344,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n"); } -static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) +static enum blk_eh_timer_return nvme_timeout(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_queue *nvmeq = iod->nvmeq; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 0fb7c8e7ab0b..a6eaf38b9646 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2013,8 +2013,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq) nvmf_complete_timed_out_request(rq); } -static enum blk_eh_timer_return -nvme_rdma_timeout(struct request *rq, bool reserved) +static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b81942fa5f95..ff502172accd 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2321,8 +2321,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq) nvmf_complete_timed_out_request(rq); } -static enum blk_eh_timer_return -nvme_tcp_timeout(struct request *rq, bool reserved) +static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) { struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index e8489331f12b..4df8bf6505fc 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3145,7 +3145,7 @@ out: * BLK_EH_DONE if the request is handled or terminated * by the driver. */ -enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) +enum blk_eh_timer_return dasd_times_out(struct request *req) { struct dasd_block *block = req->q->queuedata; struct dasd_device *device; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 83b918b84b4a..333a399f754e 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -795,7 +795,7 @@ void dasd_free_device(struct dasd_device *); struct dasd_block *dasd_alloc_block(void); void dasd_free_block(struct dasd_block *); -enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved); +enum blk_eh_timer_return dasd_times_out(struct request *req); void dasd_enable_device(struct dasd_device *); void dasd_set_target_state(struct dasd_device *, int); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index a8b71b73a5a5..266ce414589c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -318,7 +318,6 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) /** * scsi_timeout - Timeout function for normal scsi commands. * @req: request that is timing out. - * @reserved: whether the request is a reserved request. * * Notes: * We do not need to lock this. There is the potential for a race @@ -326,7 +325,7 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ -enum blk_eh_timer_return scsi_timeout(struct request *req, bool reserved) +enum blk_eh_timer_return scsi_timeout(struct request *req) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); enum blk_eh_timer_return rtn = BLK_EH_DONE; diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 695d0c83ffe0..6eeaa0a7f86d 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -72,8 +72,7 @@ extern void scsi_exit_devinfo(void); /* scsi_error.c */ extern void scmd_eh_abort_handler(struct work_struct *work); -extern enum blk_eh_timer_return scsi_timeout(struct request *req, - bool reserved); +extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); extern void scsi_eh_wakeup(struct Scsi_Host *shost); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7c62b7fabec7..c84c56d296fe 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -575,7 +575,7 @@ struct blk_mq_ops { /** * @timeout: Called on request timeout. */ - enum blk_eh_timer_return (*timeout)(struct request *, bool); + enum blk_eh_timer_return (*timeout)(struct request *); /** * @poll: Called to poll for completion of a specific tag. From 1263c1929fb8c375494666ec6d1bac838ff02c25 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:52 +0800 Subject: [PATCH 056/178] scsi: fnic: Drop reserved request handling The SCSI core code does not support reserved requests, so drop the handling in fnic_pending_aborts_iter(). Signed-off-by: John Garry Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-5-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- drivers/scsi/fnic/fnic_scsi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 3d64877bda8d..e7b7f6d73429 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -2019,8 +2019,6 @@ static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, if (sc == iter_data->lr_sc || sc->device != lun_dev) return true; - if (reserved) - return true; io_lock = fnic_io_lock_tag(fnic, abt_tag); spin_lock_irqsave(io_lock, flags); From 2dd6532e9591f201e7571b30915db807603ab924 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:53 +0800 Subject: [PATCH 057/178] blk-mq: Drop 'reserved' arg of busy_tag_iter_fn We no longer use the 'reserved' arg in busy_tag_iter_fn for any iter function so it may be dropped. Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Sagi Grimberg #nvme Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/1657109034-206040-6-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- block/blk-mq-tag.c | 7 +++---- block/blk-mq.c | 10 ++++------ drivers/block/mtip32xx/mtip32xx.c | 4 ++-- drivers/block/nbd.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 3 +-- drivers/nvme/host/core.c | 2 +- drivers/nvme/host/fc.c | 3 +-- drivers/nvme/host/nvme.h | 2 +- drivers/scsi/aacraid/comminit.c | 2 +- drivers/scsi/aacraid/linit.c | 2 +- drivers/scsi/fnic/fnic_scsi.c | 12 ++++-------- drivers/scsi/hosts.c | 14 ++++++-------- drivers/scsi/mpi3mr/mpi3mr_os.c | 16 ++++------------ include/linux/blk-mq.h | 2 +- include/scsi/scsi_host.h | 2 +- 16 files changed, 33 insertions(+), 52 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b80fae7ab1d9..b11add9a95e2 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -375,7 +375,7 @@ struct show_busy_params { * e.g. due to a concurrent blk_mq_finish_request() call. Returns true to * keep iterating requests. */ -static bool hctx_show_busy_rq(struct request *rq, void *data, bool reserved) +static bool hctx_show_busy_rq(struct request *rq, void *data) { const struct show_busy_params *params = data; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 3cfffef1feb3..4e9b8ec55bda 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -283,7 +283,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) return true; if (rq->q == q && (!hctx || rq->mq_hctx == hctx)) - ret = iter_data->fn(rq, iter_data->data, reserved); + ret = iter_data->fn(rq, iter_data->data); blk_mq_put_rq_ref(rq); return ret; } @@ -354,7 +354,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) if (!(iter_data->flags & BT_TAG_ITER_STARTED) || blk_mq_request_started(rq)) - ret = iter_data->fn(rq, iter_data->data, reserved); + ret = iter_data->fn(rq, iter_data->data); if (!iter_static_rqs) blk_mq_put_rq_ref(rq); return ret; @@ -444,8 +444,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, } EXPORT_SYMBOL(blk_mq_tagset_busy_iter); -static bool blk_mq_tagset_count_completed_rqs(struct request *rq, - void *data, bool reserved) +static bool blk_mq_tagset_count_completed_rqs(struct request *rq, void *data) { unsigned *count = data; diff --git a/block/blk-mq.c b/block/blk-mq.c index cedbec36e907..63385742b8a8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -129,8 +129,7 @@ struct mq_inflight { unsigned int inflight[2]; }; -static bool blk_mq_check_inflight(struct request *rq, void *priv, - bool reserved) +static bool blk_mq_check_inflight(struct request *rq, void *priv) { struct mq_inflight *mi = priv; @@ -1400,8 +1399,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, } EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); -static bool blk_mq_rq_inflight(struct request *rq, void *priv, - bool reserved) +static bool blk_mq_rq_inflight(struct request *rq, void *priv) { /* * If we find a request that isn't idle we know the queue is busy @@ -1470,7 +1468,7 @@ void blk_mq_put_rq_ref(struct request *rq) __blk_mq_free_request(rq); } -static bool blk_mq_check_expired(struct request *rq, void *priv, bool reserved) +static bool blk_mq_check_expired(struct request *rq, void *priv) { unsigned long *next = priv; @@ -3289,7 +3287,7 @@ struct rq_iter_data { bool has_rq; }; -static bool blk_mq_has_request(struct request *rq, void *data, bool reserved) +static bool blk_mq_has_request(struct request *rq, void *data) { struct rq_iter_data *iter_data = data; diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 5073cb407500..562725d222a7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2441,7 +2441,7 @@ static void mtip_softirq_done_fn(struct request *rq) blk_mq_end_request(rq, cmd->status); } -static bool mtip_abort_cmd(struct request *req, void *data, bool reserved) +static bool mtip_abort_cmd(struct request *req, void *data) { struct mtip_cmd *cmd = blk_mq_rq_to_pdu(req); struct driver_data *dd = data; @@ -2454,7 +2454,7 @@ static bool mtip_abort_cmd(struct request *req, void *data, bool reserved) return true; } -static bool mtip_queue_cmd(struct request *req, void *data, bool reserved) +static bool mtip_queue_cmd(struct request *req, void *data) { struct driver_data *dd = data; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 028f23c965df..f5d098a148cb 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -879,7 +879,7 @@ static void recv_work(struct work_struct *work) kfree(args); } -static bool nbd_clear_req(struct request *req, void *data, bool reserved) +static bool nbd_clear_req(struct request *req, void *data) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 6058abf42ba7..7720ea270ed8 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1282,8 +1282,7 @@ struct srp_terminate_context { int scsi_result; }; -static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr, - bool reserved) +static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr) { struct srp_terminate_context *context = context_ptr; struct srp_target_port *target = context->srp_target; diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b5b24998a5ab..9031d10c97dc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -418,7 +418,7 @@ blk_status_t nvme_host_path_error(struct request *req) } EXPORT_SYMBOL_GPL(nvme_host_path_error); -bool nvme_cancel_request(struct request *req, void *data, bool reserved) +bool nvme_cancel_request(struct request *req, void *data) { dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device, "Cancelling I/O %d", req->tag); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 07fd6db5869c..9987797620b6 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2456,8 +2456,7 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) * status. The done path will return the io request back to the block * layer with an error status. */ -static bool -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +static bool nvme_fc_terminate_exchange(struct request *req, void *data) { struct nvme_ctrl *nctrl = data; struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 0da94b233fed..e4daa57f8bd5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -697,7 +697,7 @@ static __always_inline void nvme_complete_batch(struct io_comp_batch *iob, } blk_status_t nvme_host_path_error(struct request *req); -bool nvme_cancel_request(struct request *req, void *data, bool reserved); +bool nvme_cancel_request(struct request *req, void *data); void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 940a6deab38f..bd99c5492b7d 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -272,7 +272,7 @@ static void aac_queue_init(struct aac_dev * dev, struct aac_queue * q, u32 *mem, q->entries = qsize; } -static bool wait_for_io_iter(struct scsi_cmnd *cmd, void *data, bool rsvd) +static bool wait_for_io_iter(struct scsi_cmnd *cmd, void *data) { int *active = data; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 9c27bc37e5de..5ba5c18b77b4 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -633,7 +633,7 @@ struct fib_count_data { int krlcnt; }; -static bool fib_count_iter(struct scsi_cmnd *scmnd, void *data, bool reserved) +static bool fib_count_iter(struct scsi_cmnd *scmnd, void *data) { struct fib_count_data *fib_count = data; diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index e7b7f6d73429..77a4d9f8aa83 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -1350,8 +1350,7 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do) return wq_work_done; } -static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data, - bool reserved) +static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data) { const int tag = scsi_cmd_to_rq(sc)->tag; struct fnic *fnic = data; @@ -1548,8 +1547,7 @@ struct fnic_rport_abort_io_iter_data { int term_cnt; }; -static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data, - bool reserved) +static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data) { struct fnic_rport_abort_io_iter_data *iter_data = data; struct fnic *fnic = iter_data->fnic; @@ -2003,8 +2001,7 @@ struct fnic_pending_aborts_iter_data { int ret; }; -static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, - void *data, bool reserved) +static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc, void *data) { struct fnic_pending_aborts_iter_data *iter_data = data; struct fnic *fnic = iter_data->fnic; @@ -2668,8 +2665,7 @@ call_fc_exch_mgr_reset: } -static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data, - bool reserved) +static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data) { struct fnic_pending_aborts_iter_data *iter_data = data; struct fnic *fnic = iter_data->fnic; diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 8352f90d997d..315c7ac730e9 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -566,8 +566,7 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost) } EXPORT_SYMBOL(scsi_host_get); -static bool scsi_host_check_in_flight(struct request *rq, void *data, - bool reserved) +static bool scsi_host_check_in_flight(struct request *rq, void *data) { int *count = data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); @@ -662,7 +661,7 @@ void scsi_flush_work(struct Scsi_Host *shost) } EXPORT_SYMBOL_GPL(scsi_flush_work); -static bool complete_all_cmds_iter(struct request *rq, void *data, bool rsvd) +static bool complete_all_cmds_iter(struct request *rq, void *data) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); enum scsi_host_status status = *(enum scsi_host_status *)data; @@ -693,17 +692,16 @@ void scsi_host_complete_all_commands(struct Scsi_Host *shost, EXPORT_SYMBOL_GPL(scsi_host_complete_all_commands); struct scsi_host_busy_iter_data { - bool (*fn)(struct scsi_cmnd *, void *, bool); + bool (*fn)(struct scsi_cmnd *, void *); void *priv; }; -static bool __scsi_host_busy_iter_fn(struct request *req, void *priv, - bool reserved) +static bool __scsi_host_busy_iter_fn(struct request *req, void *priv) { struct scsi_host_busy_iter_data *iter_data = priv; struct scsi_cmnd *sc = blk_mq_rq_to_pdu(req); - return iter_data->fn(sc, iter_data->priv, reserved); + return iter_data->fn(sc, iter_data->priv); } /** @@ -716,7 +714,7 @@ static bool __scsi_host_busy_iter_fn(struct request *req, void *priv, * ithas to be provided by the caller **/ void scsi_host_busy_iter(struct Scsi_Host *shost, - bool (*fn)(struct scsi_cmnd *, void *, bool), + bool (*fn)(struct scsi_cmnd *, void *), void *priv) { struct scsi_host_busy_iter_data iter_data = { diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index d8c195b7ca57..59a18769a4fe 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -381,14 +381,12 @@ void mpi3mr_invalidate_devhandles(struct mpi3mr_ioc *mrioc) * mpi3mr_print_scmd - print individual SCSI command * @rq: Block request * @data: Adapter instance reference - * @reserved: N/A. Currently not used * * Print the SCSI command details if it is in LLD scope. * * Return: true always. */ -static bool mpi3mr_print_scmd(struct request *rq, - void *data, bool reserved) +static bool mpi3mr_print_scmd(struct request *rq, void *data) { struct mpi3mr_ioc *mrioc = (struct mpi3mr_ioc *)data; struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); @@ -412,7 +410,6 @@ out: * mpi3mr_flush_scmd - Flush individual SCSI command * @rq: Block request * @data: Adapter instance reference - * @reserved: N/A. Currently not used * * Return the SCSI command to the upper layers if it is in LLD * scope. @@ -420,8 +417,7 @@ out: * Return: true always. */ -static bool mpi3mr_flush_scmd(struct request *rq, - void *data, bool reserved) +static bool mpi3mr_flush_scmd(struct request *rq, void *data) { struct mpi3mr_ioc *mrioc = (struct mpi3mr_ioc *)data; struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); @@ -451,7 +447,6 @@ out: * mpi3mr_count_dev_pending - Count commands pending for a lun * @rq: Block request * @data: SCSI device reference - * @reserved: Unused * * This is an iterator function called for each SCSI command in * a host and if the command is pending in the LLD for the @@ -461,8 +456,7 @@ out: * Return: true always. */ -static bool mpi3mr_count_dev_pending(struct request *rq, - void *data, bool reserved) +static bool mpi3mr_count_dev_pending(struct request *rq, void *data) { struct scsi_device *sdev = (struct scsi_device *)data; struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; @@ -485,7 +479,6 @@ out: * mpi3mr_count_tgt_pending - Count commands pending for target * @rq: Block request * @data: SCSI target reference - * @reserved: Unused * * This is an iterator function called for each SCSI command in * a host and if the command is pending in the LLD for the @@ -495,8 +488,7 @@ out: * Return: true always. */ -static bool mpi3mr_count_tgt_pending(struct request *rq, - void *data, bool reserved) +static bool mpi3mr_count_tgt_pending(struct request *rq, void *data) { struct scsi_target *starget = (struct scsi_target *)data; struct mpi3mr_stgt_priv_data *stgt_priv_data = starget->hostdata; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c84c56d296fe..810a24884f7e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -520,7 +520,7 @@ struct blk_mq_queue_data { bool last; }; -typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); +typedef bool (busy_tag_iter_fn)(struct request *, void *); /** * struct blk_mq_ops - Callback functions that implements block driver diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 667d889b92b5..65082ecdd557 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -786,7 +786,7 @@ extern int scsi_host_block(struct Scsi_Host *shost); extern int scsi_host_unblock(struct Scsi_Host *shost, int new_state); void scsi_host_busy_iter(struct Scsi_Host *, - bool (*fn)(struct scsi_cmnd *, void *, bool), void *priv); + bool (*fn)(struct scsi_cmnd *, void *), void *priv); struct class_container; From 4cf6e6c0106bf6e6d034fa6043b4428ac2f267fc Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 6 Jul 2022 20:03:54 +0800 Subject: [PATCH 058/178] blk-mq: Drop local variable for reserved tag The local variable is now only referenced once so drop it. Signed-off-by: John Garry Reviewed-by: Bart Van Assche Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/1657109034-206040-7-git-send-email-john.garry@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 4e9b8ec55bda..8e3b36d1cb57 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -262,7 +262,6 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) struct blk_mq_hw_ctx *hctx = iter_data->hctx; struct request_queue *q = iter_data->q; struct blk_mq_tag_set *set = q->tag_set; - bool reserved = iter_data->reserved; struct blk_mq_tags *tags; struct request *rq; bool ret = true; @@ -272,7 +271,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) else tags = hctx->tags; - if (!reserved) + if (!iter_data->reserved) bitnr += tags->nr_reserved_tags; /* * We can hit rq == NULL here, because the tagging functions @@ -333,12 +332,11 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data) { struct bt_tags_iter_data *iter_data = data; struct blk_mq_tags *tags = iter_data->tags; - bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED; struct request *rq; bool ret = true; bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS); - if (!reserved) + if (!(iter_data->flags & BT_TAG_ITER_RESERVED)) bitnr += tags->nr_reserved_tags; /* From f1a8bbd1100d9cd117bc8b7fc0903982bbaf474f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:35 +0200 Subject: [PATCH 059/178] block: remove a superflous ifdef in blkdev.h It doesn't hurt to always have the blk_zone_cond_str prototype, and the two inlines can also be defined unconditionally. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b9a94c53c6cd..270cd0c55292 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -899,8 +899,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) return bdev->bd_queue; /* this is never NULL */ } -#ifdef CONFIG_BLK_DEV_ZONED - /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); @@ -915,7 +913,6 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio) return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), bio->bi_iter.bi_sector); } -#endif /* CONFIG_BLK_DEV_ZONED */ /* * Return how much of the chunk is left to be used for I/O at a given offset. From 6cc37a672a1e21245b931722a016b3bd4ae10e2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:36 +0200 Subject: [PATCH 060/178] block: call blk_queue_free_zone_bitmaps from disk_release The zone bitmaps are only used for non-passthrough I/O, so free them as soon as the disk is released. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 2 -- block/genhd.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 58cb9cb9f48c..7590810cf13f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -776,8 +776,6 @@ static void blk_release_queue(struct kobject *kobj) blk_free_queue_stats(q->stats); kfree(q->poll_stat); - blk_queue_free_zone_bitmaps(q); - if (queue_is_mq(q)) blk_mq_release(q); diff --git a/block/genhd.c b/block/genhd.c index b1fb7e058b9c..d0bdeb93e922 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1165,6 +1165,7 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); + blk_queue_free_zone_bitmaps(disk->queue); xa_destroy(&disk->part_tbl); disk->queue->disk = NULL; From edd1dbc83b1de3b98590b76e09b86ddf6887fce7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:37 +0200 Subject: [PATCH 061/178] block: use bdev_is_zoned instead of open coding it Use bdev_is_zoned in all places where a block_device is available instead of open coding it. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-4-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 2 +- block/blk-core.c | 6 +++--- block/blk-mq.h | 2 +- block/blk-zoned.c | 9 ++++----- drivers/md/dm-table.c | 2 +- drivers/md/dm-zone.c | 2 +- drivers/md/dm.c | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/block/bio.c b/block/bio.c index 933ea3210954..888ee81ea303 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1033,7 +1033,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page, if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_ZONE_APPEND)) return 0; - if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) + if (WARN_ON_ONCE(!bdev_is_zoned(bio->bi_bdev))) return 0; return bio_add_hw_page(q, bio, page, len, offset, diff --git a/block/blk-core.c b/block/blk-core.c index 5ad7bd93077c..6bcca0b686de 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -569,7 +569,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, int nr_sectors = bio_sectors(bio); /* Only applicable to zoned block devices */ - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(bio->bi_bdev)) return BLK_STS_NOTSUPP; /* The bio sector must point to the start of a sequential zone */ @@ -775,11 +775,11 @@ void submit_bio_noacct(struct bio *bio) case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(bio->bi_bdev)) goto not_supported; break; case REQ_OP_ZONE_RESET_ALL: - if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q)) + if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q)) goto not_supported; break; case REQ_OP_WRITE_ZEROES: diff --git a/block/blk-mq.h b/block/blk-mq.h index 54e20edf0da3..31d75a83a562 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -317,7 +317,7 @@ static inline struct blk_plug *blk_mq_plug(struct request_queue *q, * For regular block devices or read operations, use the context plug * which may be NULL if blk_start_plug() was not executed. */ - if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio))) + if (!bdev_is_zoned(bio->bi_bdev) || !op_is_write(bio_op(bio))) return current->plug; /* Zoned block device write operation case: do not plug the BIO */ diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 38cd840d8838..90a5c9cc80ab 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -149,8 +149,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct gendisk *disk = bdev->bd_disk; sector_t capacity = get_capacity(disk); - if (!blk_queue_is_zoned(bdev_get_queue(bdev)) || - WARN_ON_ONCE(!disk->fops->report_zones)) + if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; if (!nr_zones || sector >= capacity) @@ -268,7 +267,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, struct bio *bio = NULL; int ret = 0; - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(bdev)) return -EOPNOTSUPP; if (bdev_read_only(bdev)) @@ -350,7 +349,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!q) return -ENXIO; - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(bdev)) return -ENOTTY; if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) @@ -408,7 +407,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, if (!q) return -ENXIO; - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(bdev)) return -ENOTTY; if (!(mode & FMODE_WRITE)) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index bd539afbfe88..b36b528e56cf 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1623,7 +1623,7 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev * struct request_queue *q = bdev_get_queue(dev->bdev); unsigned int *zone_sectors = data; - if (!blk_queue_is_zoned(q)) + if (!bdev_is_zoned(dev->bdev)) return 0; return blk_queue_zone_sectors(q) != *zone_sectors; diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 3e7b1fe1580b..ae616b87c91a 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -270,7 +270,7 @@ static int device_not_zone_append_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - return !blk_queue_is_zoned(bdev_get_queue(dev->bdev)); + return !bdev_is_zoned(dev->bdev); } static bool dm_table_supports_zone_append(struct dm_table *t) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8872f9c63688..33d3799bb66e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1033,7 +1033,7 @@ static void clone_endio(struct bio *bio) } if (static_branch_unlikely(&zoned_enabled) && - unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev)))) + unlikely(bdev_is_zoned(bio->bi_bdev))) dm_zone_endio(io, bio); if (endio) { From 6deacb3bfac2b720e707c566549a7041f17db9c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:38 +0200 Subject: [PATCH 062/178] block: simplify blk_mq_plug Drop the unused q argument, and invert the check to move the exception into a branch and the regular path as the normal return. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-5-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-merge.c | 2 +- block/blk-mq.c | 2 +- block/blk-mq.h | 18 ++++++++---------- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 6bcca0b686de..bc16e9bae2dc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -719,7 +719,7 @@ void submit_bio_noacct(struct bio *bio) might_sleep(); - plug = blk_mq_plug(q, bio); + plug = blk_mq_plug(bio); if (plug && plug->nowait) bio->bi_opf |= REQ_NOWAIT; diff --git a/block/blk-merge.c b/block/blk-merge.c index 0f5f42ebd0bb..5abf5aa5a5f0 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -1051,7 +1051,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, struct blk_plug *plug; struct request *rq; - plug = blk_mq_plug(q, bio); + plug = blk_mq_plug(bio); if (!plug || rq_list_empty(plug->mq_list)) return false; diff --git a/block/blk-mq.c b/block/blk-mq.c index 63385742b8a8..f1b84e20b1a9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2808,7 +2808,7 @@ static void bio_set_ioprio(struct bio *bio) void blk_mq_submit_bio(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); - struct blk_plug *plug = blk_mq_plug(q, bio); + struct blk_plug *plug = blk_mq_plug(bio); const int is_sync = op_is_sync(bio->bi_opf); struct request *rq; unsigned int nr_segs = 1; diff --git a/block/blk-mq.h b/block/blk-mq.h index 31d75a83a562..e694ec67d646 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -294,7 +294,6 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) /* * blk_mq_plug() - Get caller context plug - * @q: request queue * @bio : the bio being submitted by the caller context * * Plugging, by design, may delay the insertion of BIOs into the elevator in @@ -305,23 +304,22 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap) * order. While this is not a problem with regular block devices, this ordering * change can cause write BIO failures with zoned block devices as these * require sequential write patterns to zones. Prevent this from happening by - * ignoring the plug state of a BIO issuing context if the target request queue - * is for a zoned block device and the BIO to plug is a write operation. + * ignoring the plug state of a BIO issuing context if it is for a zoned block + * device and the BIO to plug is a write operation. * * Return current->plug if the bio can be plugged and NULL otherwise */ -static inline struct blk_plug *blk_mq_plug(struct request_queue *q, - struct bio *bio) +static inline struct blk_plug *blk_mq_plug( struct bio *bio) { + /* Zoned block device write operation case: do not plug the BIO */ + if (bdev_is_zoned(bio->bi_bdev) && op_is_write(bio_op(bio))) + return NULL; + /* * For regular block devices or read operations, use the context plug * which may be NULL if blk_start_plug() was not executed. */ - if (!bdev_is_zoned(bio->bi_bdev) || !op_is_write(bio_op(bio))) - return current->plug; - - /* Zoned block device write operation case: do not plug the BIO */ - return NULL; + return current->plug; } /* Free all requests on the list */ From 052e545c9276f97e86368579fda32aa1ac017d51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:39 +0200 Subject: [PATCH 063/178] block: simplify blk_check_zone_append Use the bdev based helpers instead of open coding them. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-6-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index bc16e9bae2dc..b530ce7b370c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -565,7 +565,6 @@ static int blk_partition_remap(struct bio *bio) static inline blk_status_t blk_check_zone_append(struct request_queue *q, struct bio *bio) { - sector_t pos = bio->bi_iter.bi_sector; int nr_sectors = bio_sectors(bio); /* Only applicable to zoned block devices */ @@ -573,8 +572,8 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q, return BLK_STS_NOTSUPP; /* The bio sector must point to the start of a sequential zone */ - if (pos & (blk_queue_zone_sectors(q) - 1) || - !blk_queue_zone_is_seq(q, pos)) + if (bio->bi_iter.bi_sector & (bdev_zone_sectors(bio->bi_bdev) - 1) || + !bio_zone_is_seq(bio)) return BLK_STS_IOERR; /* From 6b2bd274744e6454ba7bbbe6a09b44866f2f414a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:40 +0200 Subject: [PATCH 064/178] block: pass a gendisk to blk_queue_set_zoned Prepare for storing the zone related field in struct gendisk instead of struct request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-7-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 9 +++++---- block/partitions/core.c | 2 +- drivers/block/null_blk/zoned.c | 2 +- drivers/nvme/host/zns.c | 2 +- drivers/scsi/sd.c | 6 +++--- drivers/scsi/sd_zbc.c | 2 +- include/linux/blkdev.h | 2 +- 7 files changed, 13 insertions(+), 12 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 6ccceb421ed2..35b7bba306a8 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -893,18 +893,19 @@ static bool disk_has_partitions(struct gendisk *disk) } /** - * blk_queue_set_zoned - configure a disk queue zoned model. + * disk_set_zoned - configure the zoned model for a disk * @disk: the gendisk of the queue to configure * @model: the zoned model to set * - * Set the zoned model of the request queue of @disk according to @model. + * Set the zoned model of @disk to @model. + * * When @model is BLK_ZONED_HM (host managed), this should be called only * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option). * If @model specifies BLK_ZONED_HA (host aware), the effective model used * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions * on the disk. */ -void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) +void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) { struct request_queue *q = disk->queue; @@ -948,7 +949,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) blk_queue_clear_zone_settings(q); } } -EXPORT_SYMBOL_GPL(blk_queue_set_zoned); +EXPORT_SYMBOL_GPL(disk_set_zoned); int bdev_alignment_offset(struct block_device *bdev) { diff --git a/block/partitions/core.c b/block/partitions/core.c index 7dc487f5b03c..1a45b1dd6491 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -330,7 +330,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, case BLK_ZONED_HA: pr_info("%s: disabling host aware zoned block device support due to partitions\n", disk->disk_name); - blk_queue_set_zoned(disk, BLK_ZONED_NONE); + disk_set_zoned(disk, BLK_ZONED_NONE); break; case BLK_ZONED_NONE: break; diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 2fdd7b20c224..b47bbd114058 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -159,7 +159,7 @@ int null_register_zoned_dev(struct nullb *nullb) struct nullb_device *dev = nullb->dev; struct request_queue *q = nullb->q; - blk_queue_set_zoned(nullb->disk, BLK_ZONED_HM); + disk_set_zoned(nullb->disk, BLK_ZONED_HM); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 9f81beb4df4e..0ed15c2fd56d 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -109,7 +109,7 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) goto free_data; } - blk_queue_set_zoned(ns->disk, BLK_ZONED_HM); + disk_set_zoned(ns->disk, BLK_ZONED_HM); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index cb587e488601..eb02d939dd44 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2934,15 +2934,15 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) if (sdkp->device->type == TYPE_ZBC) { /* Host-managed */ - blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HM); + disk_set_zoned(sdkp->disk, BLK_ZONED_HM); } else { sdkp->zoned = zoned; if (sdkp->zoned == 1) { /* Host-aware */ - blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HA); + disk_set_zoned(sdkp->disk, BLK_ZONED_HA); } else { /* Regular disk or drive managed disk */ - blk_queue_set_zoned(sdkp->disk, BLK_ZONED_NONE); + disk_set_zoned(sdkp->disk, BLK_ZONED_NONE); } } diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 6acc4f406eb8..0f5823b67468 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -929,7 +929,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) /* * This can happen for a host aware disk with partitions. * The block device zone model was already cleared by - * blk_queue_set_zoned(). Only free the scsi disk zone + * disk_set_zoned(). Only free the scsi disk zone * information and exit early. */ sd_zbc_free_zone_info(sdkp); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 270cd0c55292..416faa013782 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -291,7 +291,7 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); +void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #ifdef CONFIG_BLK_DEV_ZONED From b3c72f8138b5f967a9fa527af84b35018897aba3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:41 +0200 Subject: [PATCH 065/178] block: pass a gendisk to blk_queue_clear_zone_settings Switch to a gendisk based API in preparation for moving all zone related fields from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-8-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 2 +- block/blk-zoned.c | 4 +++- block/blk.h | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 35b7bba306a8..8bb9eef5310e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -946,7 +946,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); } else { - blk_queue_clear_zone_settings(q); + disk_clear_zone_settings(disk); } } EXPORT_SYMBOL_GPL(disk_set_zoned); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 90a5c9cc80ab..82a4fa89678c 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -622,8 +622,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk, } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); -void blk_queue_clear_zone_settings(struct request_queue *q) +void disk_clear_zone_settings(struct gendisk *disk) { + struct request_queue *q = disk->queue; + blk_mq_freeze_queue(q); blk_queue_free_zone_bitmaps(q); diff --git a/block/blk.h b/block/blk.h index 58ad50cacd2d..7482a3a441dd 100644 --- a/block/blk.h +++ b/block/blk.h @@ -406,10 +406,10 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; } #ifdef CONFIG_BLK_DEV_ZONED void blk_queue_free_zone_bitmaps(struct request_queue *q); -void blk_queue_clear_zone_settings(struct request_queue *q); +void disk_clear_zone_settings(struct gendisk *disk); #else static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} -static inline void blk_queue_clear_zone_settings(struct request_queue *q) {} +static inline void disk_clear_zone_settings(struct gendisk *disk) {} #endif int blk_alloc_ext_minor(void); From 5d40066567a73a67ddb656ad118c6cfa1c4a6d71 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:42 +0200 Subject: [PATCH 066/178] block: pass a gendisk to blk_queue_free_zone_bitmaps Switch to a gendisk based API in preparation for moving all zone related fields from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-9-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-zoned.c | 8 +++++--- block/blk.h | 4 ++-- block/genhd.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 82a4fa89678c..0d431394cf90 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -449,8 +449,10 @@ fail: return ret; } -void blk_queue_free_zone_bitmaps(struct request_queue *q) +void disk_free_zone_bitmaps(struct gendisk *disk) { + struct request_queue *q = disk->queue; + kfree(q->conv_zones_bitmap); q->conv_zones_bitmap = NULL; kfree(q->seq_zones_wlock); @@ -612,7 +614,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk, ret = 0; } else { pr_warn("%s: failed to revalidate zones\n", disk->disk_name); - blk_queue_free_zone_bitmaps(q); + disk_free_zone_bitmaps(disk); } blk_mq_unfreeze_queue(q); @@ -628,7 +630,7 @@ void disk_clear_zone_settings(struct gendisk *disk) blk_mq_freeze_queue(q); - blk_queue_free_zone_bitmaps(q); + disk_free_zone_bitmaps(disk); blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q); q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE; q->nr_zones = 0; diff --git a/block/blk.h b/block/blk.h index 7482a3a441dd..b71e22c97d77 100644 --- a/block/blk.h +++ b/block/blk.h @@ -405,10 +405,10 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; } #endif #ifdef CONFIG_BLK_DEV_ZONED -void blk_queue_free_zone_bitmaps(struct request_queue *q); +void disk_free_zone_bitmaps(struct gendisk *disk); void disk_clear_zone_settings(struct gendisk *disk); #else -static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} +static inline void disk_free_zone_bitmaps(struct gendisk *disk) {} static inline void disk_clear_zone_settings(struct gendisk *disk) {} #endif diff --git a/block/genhd.c b/block/genhd.c index d0bdeb93e922..9d30f159c59a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1165,7 +1165,7 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); - blk_queue_free_zone_bitmaps(disk->queue); + disk_free_zone_bitmaps(disk); xa_destroy(&disk->part_tbl); disk->queue->disk = NULL; From 1dc0172027b0aa09823b430e395b1116d2745f36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:43 +0200 Subject: [PATCH 067/178] block: remove queue_max_open_zones and queue_max_active_zones Always use the bdev based helpers instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-10-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 4 ++-- include/linux/blkdev.h | 37 ++++++++++--------------------------- 2 files changed, 12 insertions(+), 29 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 7590810cf13f..5ce72345ac66 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -330,12 +330,12 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) { - return queue_var_show(queue_max_open_zones(q), page); + return queue_var_show(bdev_max_open_zones(q->disk->part0), page); } static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page) { - return queue_var_show(queue_max_active_zones(q), page); + return queue_var_show(bdev_max_active_zones(q->disk->part0), page); } static ssize_t queue_nomerges_show(struct request_queue *q, char *page) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 416faa013782..7d4105d23b0a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -702,21 +702,22 @@ static inline void blk_queue_max_open_zones(struct request_queue *q, q->max_open_zones = max_open_zones; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) -{ - return q->max_open_zones; -} - static inline void blk_queue_max_active_zones(struct request_queue *q, unsigned int max_active_zones) { q->max_active_zones = max_active_zones; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return q->max_active_zones; + return bdev->bd_disk->queue->max_open_zones; } + +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) +{ + return bdev->bd_disk->queue->max_active_zones; +} + #else /* CONFIG_BLK_DEV_ZONED */ static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -732,11 +733,11 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q, { return 0; } -static inline unsigned int queue_max_open_zones(const struct request_queue *q) +static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } -static inline unsigned int queue_max_active_zones(const struct request_queue *q) +static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; } @@ -1314,24 +1315,6 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) return 0; } -static inline unsigned int bdev_max_open_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_open_zones(q); - return 0; -} - -static inline unsigned int bdev_max_active_zones(struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return queue_max_active_zones(q); - return 0; -} - static inline int queue_dma_alignment(const struct request_queue *q) { return q ? q->dma_alignment : 511; From 982977df48179c8c690868f398051074e68eef0f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:44 +0200 Subject: [PATCH 068/178] block: pass a gendisk to blk_queue_max_open_zones and blk_queue_max_active_zones Switch to a gendisk based API in preparation for moving all zone related fields from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-11-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 4 ++-- drivers/nvme/host/zns.c | 4 ++-- drivers/scsi/sd_zbc.c | 6 +++--- include/linux/blkdev.h | 8 ++++---- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index b47bbd114058..576ab3ed082a 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -174,8 +174,8 @@ int null_register_zoned_dev(struct nullb *nullb) } blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); - blk_queue_max_open_zones(q, dev->zone_max_open); - blk_queue_max_active_zones(q, dev->zone_max_active); + disk_set_max_open_zones(nullb->disk, dev->zone_max_open); + disk_set_max_active_zones(nullb->disk, dev->zone_max_active); return 0; } diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 0ed15c2fd56d..12316ab51bda 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -111,8 +111,8 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) disk_set_zoned(ns->disk, BLK_ZONED_HM); blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); - blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1); - blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1); + disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); + disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); free_data: kfree(id); return status; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 0f5823b67468..b4106f899734 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -950,10 +950,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]) blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE); if (sdkp->zones_max_open == U32_MAX) - blk_queue_max_open_zones(q, 0); + disk_set_max_open_zones(disk, 0); else - blk_queue_max_open_zones(q, sdkp->zones_max_open); - blk_queue_max_active_zones(q, 0); + disk_set_max_open_zones(disk, sdkp->zones_max_open); + disk_set_max_active_zones(disk, 0); nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7d4105d23b0a..c05e1cc05c26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -696,16 +696,16 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q, return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); } -static inline void blk_queue_max_open_zones(struct request_queue *q, +static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - q->max_open_zones = max_open_zones; + disk->queue->max_open_zones = max_open_zones; } -static inline void blk_queue_max_active_zones(struct request_queue *q, +static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - q->max_active_zones = max_active_zones; + disk->queue->max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) From b623e347323f6464b20fb0d899a0a73522ed8f6c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:45 +0200 Subject: [PATCH 069/178] block: replace blkdev_nr_zones with bdev_nr_zones Pass a block_device instead of a request_queue as that is what most callers have at hand. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Acked-by: Damien Le Moal Link: https://lore.kernel.org/r/20220706070350.1703384-12-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-zoned.c | 15 ++++++++------- block/ioctl.c | 2 +- drivers/block/null_blk/zoned.c | 2 +- drivers/md/dm-zone.c | 2 +- drivers/md/dm-zoned-target.c | 5 ++--- drivers/nvme/target/zns.c | 6 +++--- fs/zonefs/super.c | 17 ++++++++--------- include/linux/blkdev.h | 4 ++-- 8 files changed, 26 insertions(+), 27 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 0d431394cf90..2dec25d8aa3b 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -108,21 +108,22 @@ void __blk_req_zone_write_unlock(struct request *rq) EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); /** - * blkdev_nr_zones - Get number of zones - * @disk: Target gendisk + * bdev_nr_zones - Get number of zones + * @bdev: Target device * * Return the total number of zones of a zoned block device. For a block * device without zone capabilities, the number of zones is always 0. */ -unsigned int blkdev_nr_zones(struct gendisk *disk) +unsigned int bdev_nr_zones(struct block_device *bdev) { - sector_t zone_sectors = blk_queue_zone_sectors(disk->queue); + sector_t zone_sectors = bdev_zone_sectors(bdev); - if (!blk_queue_is_zoned(disk->queue)) + if (!bdev_is_zoned(bdev)) return 0; - return (get_capacity(disk) + zone_sectors - 1) >> ilog2(zone_sectors); + return (bdev_nr_sectors(bdev) + zone_sectors - 1) >> + ilog2(zone_sectors); } -EXPORT_SYMBOL_GPL(blkdev_nr_zones); +EXPORT_SYMBOL_GPL(bdev_nr_zones); /** * blkdev_report_zones - Get zones information diff --git a/block/ioctl.c b/block/ioctl.c index 46949f1b0dba..60121e89052b 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -495,7 +495,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, case BLKGETZONESZ: return put_uint(argp, bdev_zone_sectors(bdev)); case BLKGETNRZONES: - return put_uint(argp, blkdev_nr_zones(bdev->bd_disk)); + return put_uint(argp, bdev_nr_zones(bdev)); case BLKROGET: return put_int(argp, bdev_read_only(bdev) != 0); case BLKSSZGET: /* get block device logical block size */ diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 576ab3ed082a..e62c52e96425 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -170,7 +170,7 @@ int null_register_zoned_dev(struct nullb *nullb) return ret; } else { blk_queue_chunk_sectors(q, dev->zone_size_sects); - q->nr_zones = blkdev_nr_zones(nullb->disk); + q->nr_zones = bdev_nr_zones(nullb->disk->part0); } blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index ae616b87c91a..6d105abe1241 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -301,7 +301,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) * correct value to be exposed in sysfs queue/nr_zones. */ WARN_ON_ONCE(queue_is_mq(q)); - q->nr_zones = blkdev_nr_zones(md->disk); + q->nr_zones = bdev_nr_zones(md->disk->part0); /* Check if zone append is natively supported */ if (dm_table_supports_zone_append(t)) { diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 0ec5d8b9b1a4..6ba6ef44b00e 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -793,8 +793,7 @@ static int dmz_fixup_devices(struct dm_target *ti) } zone_nr_sectors = blk_queue_zone_sectors(q); zoned_dev->zone_nr_sectors = zone_nr_sectors; - zoned_dev->nr_zones = - blkdev_nr_zones(zoned_dev->bdev->bd_disk); + zoned_dev->nr_zones = bdev_nr_zones(zoned_dev->bdev); } } else { reg_dev = NULL; @@ -805,7 +804,7 @@ static int dmz_fixup_devices(struct dm_target *ti) } q = bdev_get_queue(zoned_dev->bdev); zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q); - zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk); + zoned_dev->nr_zones = bdev_nr_zones(zoned_dev->bdev); } if (reg_dev) { diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 82b61acf7a72..c4c99b832daf 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -60,7 +60,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) if (ns->bdev->bd_disk->queue->conv_zones_bitmap) return false; - ret = blkdev_report_zones(ns->bdev, 0, blkdev_nr_zones(bd_disk), + ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev), validate_conv_zones_cb, NULL); if (ret < 0) return false; @@ -241,7 +241,7 @@ static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req) { unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba); - return blkdev_nr_zones(req->ns->bdev->bd_disk) - + return bdev_nr_zones(req->ns->bdev) - (sect >> ilog2(bdev_zone_sectors(req->ns->bdev))); } @@ -386,7 +386,7 @@ static int zmgmt_send_scan_cb(struct blk_zone *z, unsigned i, void *d) static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) { struct block_device *bdev = req->ns->bdev; - unsigned int nr_zones = blkdev_nr_zones(bdev->bd_disk); + unsigned int nr_zones = bdev_nr_zones(bdev); struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = NULL; sector_t sector = 0; diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 053299758deb..9c0eef1ff32a 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1394,7 +1394,7 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, { struct super_block *sb = parent->i_sb; - inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1; + inode->i_ino = bdev_nr_zones(sb->s_bdev) + type + 1; inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555); inode->i_op = &zonefs_dir_inode_operations; inode->i_fop = &simple_dir_operations; @@ -1540,7 +1540,7 @@ static int zonefs_create_zgroup(struct zonefs_zone_data *zd, /* * The first zone contains the super block: skip it. */ - end = zd->zones + blkdev_nr_zones(sb->s_bdev->bd_disk); + end = zd->zones + bdev_nr_zones(sb->s_bdev); for (zone = &zd->zones[1]; zone < end; zone = next) { next = zone + 1; @@ -1635,8 +1635,8 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd) struct block_device *bdev = zd->sb->s_bdev; int ret; - zd->zones = kvcalloc(blkdev_nr_zones(bdev->bd_disk), - sizeof(struct blk_zone), GFP_KERNEL); + zd->zones = kvcalloc(bdev_nr_zones(bdev), sizeof(struct blk_zone), + GFP_KERNEL); if (!zd->zones) return -ENOMEM; @@ -1648,9 +1648,9 @@ static int zonefs_get_zone_info(struct zonefs_zone_data *zd) return ret; } - if (ret != blkdev_nr_zones(bdev->bd_disk)) { + if (ret != bdev_nr_zones(bdev)) { zonefs_err(zd->sb, "Invalid zone report (%d/%u zones)\n", - ret, blkdev_nr_zones(bdev->bd_disk)); + ret, bdev_nr_zones(bdev)); return -EIO; } @@ -1816,8 +1816,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) if (ret) goto cleanup; - zonefs_info(sb, "Mounting %u zones", - blkdev_nr_zones(sb->s_bdev->bd_disk)); + zonefs_info(sb, "Mounting %u zones", bdev_nr_zones(sb->s_bdev)); if (!sbi->s_max_wro_seq_files && !sbi->s_max_active_seq_files && @@ -1833,7 +1832,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) if (!inode) goto cleanup; - inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk); + inode->i_ino = bdev_nr_zones(sb->s_bdev); inode->i_mode = S_IFDIR | 0555; inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode); inode->i_op = &zonefs_dir_inode_operations; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c05e1cc05c26..fa2757ef4a84 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -298,7 +298,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -unsigned int blkdev_nr_zones(struct gendisk *disk); +unsigned int bdev_nr_zones(struct block_device *bdev); extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); @@ -312,7 +312,7 @@ extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blkdev_nr_zones(struct gendisk *disk) +static inline unsigned int bdev_nr_zones(struct block_device *bdev) { return 0; } From 375c140c199ebd2866f9c50a0b8853ffca3f1b68 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:46 +0200 Subject: [PATCH 070/178] block: use bdev based helpers in blkdev_zone_mgmt{,all} Use the bdev based helpers instead of the queue based ones to clean up the code a bit and prepare for storing all zone related fields in struct gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-13-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-zoned.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 2dec25d8aa3b..c2d8a38f449a 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -190,8 +190,8 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev, gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); - sector_t capacity = get_capacity(bdev->bd_disk); - sector_t zone_sectors = blk_queue_zone_sectors(q); + sector_t capacity = bdev_nr_sectors(bdev); + sector_t zone_sectors = bdev_zone_sectors(bdev); unsigned long *need_reset; struct bio *bio = NULL; sector_t sector = 0; @@ -262,8 +262,8 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); - sector_t zone_sectors = blk_queue_zone_sectors(q); - sector_t capacity = get_capacity(bdev->bd_disk); + sector_t zone_sectors = bdev_zone_sectors(bdev); + sector_t capacity = bdev_nr_sectors(bdev); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; int ret = 0; From a239145ad18b59338a2b6c419c1a15a0e52d1315 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:47 +0200 Subject: [PATCH 071/178] nvmet:: use bdev based helpers in nvmet_bdev_zone_mgmt_emulate_all Use the bdev based helpers instead of the queue based ones to clean up the code a bit and prepare for storing all zone related fields in struct gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Link: https://lore.kernel.org/r/20220706070350.1703384-14-hch@lst.de Signed-off-by: Jens Axboe --- drivers/nvme/target/zns.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index c4c99b832daf..9d8717126ab3 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -413,7 +413,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) ret = 0; } - while (sector < get_capacity(bdev->bd_disk)) { + while (sector < bdev_nr_sectors(bdev)) { if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) { bio = blk_next_bio(bio, bdev, 0, zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC, @@ -422,7 +422,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) /* This may take a while, so be nice to others */ cond_resched(); } - sector += blk_queue_zone_sectors(q); + sector += bdev_zone_sectors(bdev); } if (bio) { From fabed68c272389db85655a2933737d602f4008fb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:48 +0200 Subject: [PATCH 072/178] dm-zoned: cleanup dmz_fixup_devices Use the bdev based helpers where applicable and move the zoned_dev into the scope where it is actually used. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-15-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-zoned-target.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 6ba6ef44b00e..95b132b52f33 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -764,8 +764,7 @@ static void dmz_put_zoned_device(struct dm_target *ti) static int dmz_fixup_devices(struct dm_target *ti) { struct dmz_target *dmz = ti->private; - struct dmz_dev *reg_dev, *zoned_dev; - struct request_queue *q; + struct dmz_dev *reg_dev = NULL; sector_t zone_nr_sectors = 0; int i; @@ -780,31 +779,32 @@ static int dmz_fixup_devices(struct dm_target *ti) return -EINVAL; } for (i = 1; i < dmz->nr_ddevs; i++) { - zoned_dev = &dmz->dev[i]; + struct dmz_dev *zoned_dev = &dmz->dev[i]; + struct block_device *bdev = zoned_dev->bdev; + if (zoned_dev->flags & DMZ_BDEV_REGULAR) { ti->error = "Secondary disk is not a zoned device"; return -EINVAL; } - q = bdev_get_queue(zoned_dev->bdev); if (zone_nr_sectors && - zone_nr_sectors != blk_queue_zone_sectors(q)) { + zone_nr_sectors != bdev_zone_sectors(bdev)) { ti->error = "Zone nr sectors mismatch"; return -EINVAL; } - zone_nr_sectors = blk_queue_zone_sectors(q); + zone_nr_sectors = bdev_zone_sectors(bdev); zoned_dev->zone_nr_sectors = zone_nr_sectors; - zoned_dev->nr_zones = bdev_nr_zones(zoned_dev->bdev); + zoned_dev->nr_zones = bdev_nr_zones(bdev); } } else { - reg_dev = NULL; - zoned_dev = &dmz->dev[0]; + struct dmz_dev *zoned_dev = &dmz->dev[0]; + struct block_device *bdev = zoned_dev->bdev; + if (zoned_dev->flags & DMZ_BDEV_REGULAR) { ti->error = "Disk is not a zoned device"; return -EINVAL; } - q = bdev_get_queue(zoned_dev->bdev); - zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q); - zoned_dev->nr_zones = bdev_nr_zones(zoned_dev->bdev); + zoned_dev->zone_nr_sectors = bdev_zone_sectors(bdev); + zoned_dev->nr_zones = bdev_nr_zones(bdev); } if (reg_dev) { From de71973c2951cb2ce4b46560f021f03b15906408 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:49 +0200 Subject: [PATCH 073/178] block: remove blk_queue_zone_sectors Always use bdev_zone_sectors instead. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-16-hch@lst.de Signed-off-by: Jens Axboe --- drivers/md/dm-table.c | 4 +--- drivers/md/dm-zone.c | 10 ++++++---- include/linux/blkdev.h | 11 +++-------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index b36b528e56cf..df904b7e95ce 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1620,13 +1620,11 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); unsigned int *zone_sectors = data; if (!bdev_is_zoned(dev->bdev)) return 0; - - return blk_queue_zone_sectors(q) != *zone_sectors; + return bdev_zone_sectors(dev->bdev) != *zone_sectors; } /* diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 6d105abe1241..842c31019b51 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -334,7 +334,7 @@ static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx, static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, unsigned int *wp_ofst) { - sector_t sector = zno * blk_queue_zone_sectors(md->queue); + sector_t sector = zno * bdev_zone_sectors(md->disk->part0); unsigned int noio_flag; struct dm_table *t; int srcu_idx, ret; @@ -373,7 +373,7 @@ struct orig_bio_details { static bool dm_zone_map_bio_begin(struct mapped_device *md, unsigned int zno, struct bio *clone) { - sector_t zsectors = blk_queue_zone_sectors(md->queue); + sector_t zsectors = bdev_zone_sectors(md->disk->part0); unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]); /* @@ -443,7 +443,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z return BLK_STS_OK; case REQ_OP_ZONE_FINISH: WRITE_ONCE(md->zwp_offset[zno], - blk_queue_zone_sectors(md->queue)); + bdev_zone_sectors(md->disk->part0)); return BLK_STS_OK; case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE: @@ -593,6 +593,7 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) { struct mapped_device *md = io->md; struct request_queue *q = md->queue; + struct gendisk *disk = md->disk; struct bio *orig_bio = io->orig_bio; unsigned int zwp_offset; unsigned int zno; @@ -608,7 +609,8 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) */ if (clone->bi_status == BLK_STS_OK && bio_op(clone) == REQ_OP_ZONE_APPEND) { - sector_t mask = (sector_t)blk_queue_zone_sectors(q) - 1; + sector_t mask = + (sector_t)bdev_zone_sectors(disk->part0) - 1; orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fa2757ef4a84..21b97f7115dc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -667,11 +667,6 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } } -static inline sector_t blk_queue_zone_sectors(struct request_queue *q) -{ - return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0; -} - #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_queue_nr_zones(struct request_queue *q) { @@ -1310,9 +1305,9 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - if (q) - return blk_queue_zone_sectors(q); - return 0; + if (!blk_queue_is_zoned(q)) + return 0; + return q->limits.chunk_sectors; } static inline int queue_dma_alignment(const struct request_queue *q) From d86e716aa40643e3eb8c69fab3a198146bf76dd6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Jul 2022 09:03:50 +0200 Subject: [PATCH 074/178] block: move zone related fields to struct gendisk Move the zone related fields that are currently stored in struct request_queue to struct gendisk as these are part of the highlevel block layer API and are only used for non-passthrough I/O that requires the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220706070350.1703384-17-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq-debugfs-zoned.c | 6 +-- block/blk-sysfs.c | 2 +- block/blk-zoned.c | 45 ++++++++--------- drivers/block/null_blk/zoned.c | 2 +- drivers/md/dm-zone.c | 74 +++++++++++++-------------- drivers/nvme/host/multipath.c | 2 +- drivers/nvme/target/zns.c | 4 +- drivers/scsi/sd_zbc.c | 2 +- include/linux/blk-mq.h | 8 +-- include/linux/blkdev.h | 91 ++++++++++++++++------------------ 10 files changed, 111 insertions(+), 125 deletions(-) diff --git a/block/blk-mq-debugfs-zoned.c b/block/blk-mq-debugfs-zoned.c index 038cb627c868..a77b099c34b7 100644 --- a/block/blk-mq-debugfs-zoned.c +++ b/block/blk-mq-debugfs-zoned.c @@ -11,11 +11,11 @@ int queue_zone_wlock_show(void *data, struct seq_file *m) struct request_queue *q = data; unsigned int i; - if (!q->seq_zones_wlock) + if (!q->disk->seq_zones_wlock) return 0; - for (i = 0; i < q->nr_zones; i++) - if (test_bit(i, q->seq_zones_wlock)) + for (i = 0; i < q->disk->nr_zones; i++) + if (test_bit(i, q->disk->seq_zones_wlock)) seq_printf(m, "%u\n", i); return 0; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 5ce72345ac66..c0303026752d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -325,7 +325,7 @@ static ssize_t queue_zoned_show(struct request_queue *q, char *page) static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) { - return queue_var_show(blk_queue_nr_zones(q), page); + return queue_var_show(disk_nr_zones(q->disk), page); } static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index c2d8a38f449a..7c017458d5ce 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -57,10 +57,10 @@ EXPORT_SYMBOL_GPL(blk_zone_cond_str); */ bool blk_req_needs_zone_write_lock(struct request *rq) { - if (!rq->q->seq_zones_wlock) + if (blk_rq_is_passthrough(rq)) return false; - if (blk_rq_is_passthrough(rq)) + if (!rq->q->disk->seq_zones_wlock) return false; switch (req_op(rq)) { @@ -77,7 +77,7 @@ bool blk_req_zone_write_trylock(struct request *rq) { unsigned int zno = blk_rq_zone_no(rq); - if (test_and_set_bit(zno, rq->q->seq_zones_wlock)) + if (test_and_set_bit(zno, rq->q->disk->seq_zones_wlock)) return false; WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); @@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock); void __blk_req_zone_write_lock(struct request *rq) { if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq), - rq->q->seq_zones_wlock))) + rq->q->disk->seq_zones_wlock))) return; WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); @@ -101,9 +101,9 @@ EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock); void __blk_req_zone_write_unlock(struct request *rq) { rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED; - if (rq->q->seq_zones_wlock) + if (rq->q->disk->seq_zones_wlock) WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq), - rq->q->seq_zones_wlock)); + rq->q->disk->seq_zones_wlock)); } EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); @@ -189,7 +189,7 @@ static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx, static int blkdev_zone_reset_all_emulated(struct block_device *bdev, gfp_t gfp_mask) { - struct request_queue *q = bdev_get_queue(bdev); + struct gendisk *disk = bdev->bd_disk; sector_t capacity = bdev_nr_sectors(bdev); sector_t zone_sectors = bdev_zone_sectors(bdev); unsigned long *need_reset; @@ -197,19 +197,18 @@ static int blkdev_zone_reset_all_emulated(struct block_device *bdev, sector_t sector = 0; int ret; - need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones); + need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones); if (!need_reset) return -ENOMEM; - ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0, - q->nr_zones, blk_zone_need_reset_cb, - need_reset); + ret = disk->fops->report_zones(disk, 0, disk->nr_zones, + blk_zone_need_reset_cb, need_reset); if (ret < 0) goto out_free_need_reset; ret = 0; while (sector < capacity) { - if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) { + if (!test_bit(disk_zone_no(disk, sector), need_reset)) { sector += zone_sectors; continue; } @@ -452,12 +451,10 @@ fail: void disk_free_zone_bitmaps(struct gendisk *disk) { - struct request_queue *q = disk->queue; - - kfree(q->conv_zones_bitmap); - q->conv_zones_bitmap = NULL; - kfree(q->seq_zones_wlock); - q->seq_zones_wlock = NULL; + kfree(disk->conv_zones_bitmap); + disk->conv_zones_bitmap = NULL; + kfree(disk->seq_zones_wlock); + disk->seq_zones_wlock = NULL; } struct blk_revalidate_zone_args { @@ -607,9 +604,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk, blk_mq_freeze_queue(q); if (ret > 0) { blk_queue_chunk_sectors(q, args.zone_sectors); - q->nr_zones = args.nr_zones; - swap(q->seq_zones_wlock, args.seq_zones_wlock); - swap(q->conv_zones_bitmap, args.conv_zones_bitmap); + disk->nr_zones = args.nr_zones; + swap(disk->seq_zones_wlock, args.seq_zones_wlock); + swap(disk->conv_zones_bitmap, args.conv_zones_bitmap); if (update_driver_data) update_driver_data(disk); ret = 0; @@ -634,9 +631,9 @@ void disk_clear_zone_settings(struct gendisk *disk) disk_free_zone_bitmaps(disk); blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q); q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE; - q->nr_zones = 0; - q->max_open_zones = 0; - q->max_active_zones = 0; + disk->nr_zones = 0; + disk->max_open_zones = 0; + disk->max_active_zones = 0; q->limits.chunk_sectors = 0; q->limits.zone_write_granularity = 0; q->limits.max_zone_append_sectors = 0; diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index e62c52e96425..64b06caab984 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -170,7 +170,7 @@ int null_register_zoned_dev(struct nullb *nullb) return ret; } else { blk_queue_chunk_sectors(q, dev->zone_size_sects); - q->nr_zones = bdev_nr_zones(nullb->disk->part0); + nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0); } blk_queue_max_zone_append_sectors(q, dev->zone_size_sects); diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 842c31019b51..2b89cde30c9e 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -139,13 +139,11 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) void dm_cleanup_zoned_dev(struct mapped_device *md) { - struct request_queue *q = md->queue; - - if (q) { - kfree(q->conv_zones_bitmap); - q->conv_zones_bitmap = NULL; - kfree(q->seq_zones_wlock); - q->seq_zones_wlock = NULL; + if (md->disk) { + kfree(md->disk->conv_zones_bitmap); + md->disk->conv_zones_bitmap = NULL; + kfree(md->disk->seq_zones_wlock); + md->disk->seq_zones_wlock = NULL; } kvfree(md->zwp_offset); @@ -179,31 +177,31 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct mapped_device *md = data; - struct request_queue *q = md->queue; + struct gendisk *disk = md->disk; switch (zone->type) { case BLK_ZONE_TYPE_CONVENTIONAL: - if (!q->conv_zones_bitmap) { - q->conv_zones_bitmap = - kcalloc(BITS_TO_LONGS(q->nr_zones), + if (!disk->conv_zones_bitmap) { + disk->conv_zones_bitmap = + kcalloc(BITS_TO_LONGS(disk->nr_zones), sizeof(unsigned long), GFP_NOIO); - if (!q->conv_zones_bitmap) + if (!disk->conv_zones_bitmap) return -ENOMEM; } - set_bit(idx, q->conv_zones_bitmap); + set_bit(idx, disk->conv_zones_bitmap); break; case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: - if (!q->seq_zones_wlock) { - q->seq_zones_wlock = - kcalloc(BITS_TO_LONGS(q->nr_zones), + if (!disk->seq_zones_wlock) { + disk->seq_zones_wlock = + kcalloc(BITS_TO_LONGS(disk->nr_zones), sizeof(unsigned long), GFP_NOIO); - if (!q->seq_zones_wlock) + if (!disk->seq_zones_wlock) return -ENOMEM; } if (!md->zwp_offset) { md->zwp_offset = - kvcalloc(q->nr_zones, sizeof(unsigned int), + kvcalloc(disk->nr_zones, sizeof(unsigned int), GFP_KERNEL); if (!md->zwp_offset) return -ENOMEM; @@ -228,7 +226,7 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx, */ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) { - struct request_queue *q = md->queue; + struct gendisk *disk = md->disk; unsigned int noio_flag; int ret; @@ -236,7 +234,7 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) * Check if something changed. If yes, cleanup the current resources * and reallocate everything. */ - if (!q->nr_zones || q->nr_zones != md->nr_zones) + if (!disk->nr_zones || disk->nr_zones != md->nr_zones) dm_cleanup_zoned_dev(md); if (md->nr_zones) return 0; @@ -246,17 +244,17 @@ static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) * operations in this context are done as if GFP_NOIO was specified. */ noio_flag = memalloc_noio_save(); - ret = dm_blk_do_report_zones(md, t, 0, q->nr_zones, + ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones, dm_zone_revalidate_cb, md); memalloc_noio_restore(noio_flag); if (ret < 0) goto err; - if (ret != q->nr_zones) { + if (ret != disk->nr_zones) { ret = -EIO; goto err; } - md->nr_zones = q->nr_zones; + md->nr_zones = disk->nr_zones; return 0; @@ -301,7 +299,7 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) * correct value to be exposed in sysfs queue/nr_zones. */ WARN_ON_ONCE(queue_is_mq(q)); - q->nr_zones = bdev_nr_zones(md->disk->part0); + md->disk->nr_zones = bdev_nr_zones(md->disk->part0); /* Check if zone append is natively supported */ if (dm_table_supports_zone_append(t)) { @@ -466,26 +464,26 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int z } } -static inline void dm_zone_lock(struct request_queue *q, - unsigned int zno, struct bio *clone) +static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno, + struct bio *clone) { if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))) return; - wait_on_bit_lock_io(q->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); + wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE); bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED); } -static inline void dm_zone_unlock(struct request_queue *q, - unsigned int zno, struct bio *clone) +static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno, + struct bio *clone) { if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)) return; - WARN_ON_ONCE(!test_bit(zno, q->seq_zones_wlock)); - clear_bit_unlock(zno, q->seq_zones_wlock); + WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock)); + clear_bit_unlock(zno, disk->seq_zones_wlock); smp_mb__after_atomic(); - wake_up_bit(q->seq_zones_wlock, zno); + wake_up_bit(disk->seq_zones_wlock, zno); bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED); } @@ -520,7 +518,6 @@ int dm_zone_map_bio(struct dm_target_io *tio) struct dm_io *io = tio->io; struct dm_target *ti = tio->ti; struct mapped_device *md = io->md; - struct request_queue *q = md->queue; struct bio *clone = &tio->clone; struct orig_bio_details orig_bio_details; unsigned int zno; @@ -536,7 +533,7 @@ int dm_zone_map_bio(struct dm_target_io *tio) /* Lock the target zone */ zno = bio_zone_no(clone); - dm_zone_lock(q, zno, clone); + dm_zone_lock(md->disk, zno, clone); orig_bio_details.nr_sectors = bio_sectors(clone); orig_bio_details.op = bio_op(clone); @@ -546,7 +543,7 @@ int dm_zone_map_bio(struct dm_target_io *tio) * both valid, and if the bio is a zone append, remap it to a write. */ if (!dm_zone_map_bio_begin(md, zno, clone)) { - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(md->disk, zno, clone); return DM_MAPIO_KILL; } @@ -570,12 +567,12 @@ int dm_zone_map_bio(struct dm_target_io *tio) sts = dm_zone_map_bio_end(md, zno, &orig_bio_details, *tio->len_ptr); if (sts != BLK_STS_OK) - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(md->disk, zno, clone); break; case DM_MAPIO_REQUEUE: case DM_MAPIO_KILL: default: - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(md->disk, zno, clone); sts = BLK_STS_IOERR; break; } @@ -592,7 +589,6 @@ int dm_zone_map_bio(struct dm_target_io *tio) void dm_zone_endio(struct dm_io *io, struct bio *clone) { struct mapped_device *md = io->md; - struct request_queue *q = md->queue; struct gendisk *disk = md->disk; struct bio *orig_bio = io->orig_bio; unsigned int zwp_offset; @@ -651,5 +647,5 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone) zwp_offset - bio_sectors(orig_bio); } - dm_zone_unlock(q, zno, clone); + dm_zone_unlock(disk, zno, clone); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ccf9a6da8f6e..f26640ccb955 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -830,7 +830,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) ns->head->disk->queue); #ifdef CONFIG_BLK_DEV_ZONED if (blk_queue_is_zoned(ns->queue) && ns->head->disk) - ns->head->disk->queue->nr_zones = ns->queue->nr_zones; + ns->head->disk->nr_zones = ns->disk->nr_zones; #endif } diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index 9d8717126ab3..c0ee21fcab81 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -57,7 +57,7 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns) * zones, reject the device. Otherwise, use report zones to detect if * the device has conventional zones. */ - if (ns->bdev->bd_disk->queue->conv_zones_bitmap) + if (ns->bdev->bd_disk->conv_zones_bitmap) return false; ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev), @@ -414,7 +414,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) } while (sector < bdev_nr_sectors(bdev)) { - if (test_bit(blk_queue_zone_no(q, sector), d.zbitmap)) { + if (test_bit(disk_zone_no(bdev->bd_disk, sector), d.zbitmap)) { bio = blk_next_bio(bio, bdev, 0, zsa_req_op(req->cmd->zms.zsa) | REQ_SYNC, GFP_KERNEL); diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index b4106f899734..b8c97456506a 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -855,7 +855,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) if (sdkp->zone_info.zone_blocks == zone_blocks && sdkp->zone_info.nr_zones == nr_zones && - disk->queue->nr_zones == nr_zones) + disk->nr_zones == nr_zones) goto unlock; flags = memalloc_noio_save(); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 810a24884f7e..d74f6a6b7e69 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -1129,12 +1129,12 @@ void blk_dump_rq_flags(struct request *, char *); #ifdef CONFIG_BLK_DEV_ZONED static inline unsigned int blk_rq_zone_no(struct request *rq) { - return blk_queue_zone_no(rq->q, blk_rq_pos(rq)); + return disk_zone_no(rq->q->disk, blk_rq_pos(rq)); } static inline unsigned int blk_rq_zone_is_seq(struct request *rq) { - return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq)); + return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq)); } bool blk_req_needs_zone_write_lock(struct request *rq); @@ -1156,8 +1156,8 @@ static inline void blk_req_zone_write_unlock(struct request *rq) static inline bool blk_req_zone_is_write_locked(struct request *rq) { - return rq->q->seq_zones_wlock && - test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock); + return rq->q->disk->seq_zones_wlock && + test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock); } static inline bool blk_req_can_dispatch_to_zone(struct request *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 21b97f7115dc..22c477fadc0f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -164,6 +164,29 @@ struct gendisk { #ifdef CONFIG_BLK_DEV_INTEGRITY struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ + +#ifdef CONFIG_BLK_DEV_ZONED + /* + * Zoned block device information for request dispatch control. + * nr_zones is the total number of zones of the device. This is always + * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones + * bits which indicates if a zone is conventional (bit set) or + * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones + * bits which indicates if a zone is write locked, that is, if a write + * request targeting the zone was dispatched. + * + * Reads of this information must be protected with blk_queue_enter() / + * blk_queue_exit(). Modifying this information is only allowed while + * no requests are being processed. See also blk_mq_freeze_queue() and + * blk_mq_unfreeze_queue(). + */ + unsigned int nr_zones; + unsigned int max_open_zones; + unsigned int max_active_zones; + unsigned long *conv_zones_bitmap; + unsigned long *seq_zones_wlock; +#endif /* CONFIG_BLK_DEV_ZONED */ + #if IS_ENABLED(CONFIG_CDROM) struct cdrom_device_info *cdi; #endif @@ -467,31 +490,6 @@ struct request_queue { unsigned int required_elevator_features; -#ifdef CONFIG_BLK_DEV_ZONED - /* - * Zoned block device information for request dispatch control. - * nr_zones is the total number of zones of the device. This is always - * 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones - * bits which indicates if a zone is conventional (bit set) or - * sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones - * bits which indicates if a zone is write locked, that is, if a write - * request targeting the zone was dispatched. All three fields are - * initialized by the low level device driver (e.g. scsi/sd.c). - * Stacking drivers (device mappers) may or may not initialize - * these fields. - * - * Reads of this information must be protected with blk_queue_enter() / - * blk_queue_exit(). Modifying this information is only allowed while - * no requests are being processed. See also blk_mq_freeze_queue() and - * blk_mq_unfreeze_queue(). - */ - unsigned int nr_zones; - unsigned long *conv_zones_bitmap; - unsigned long *seq_zones_wlock; - unsigned int max_open_zones; - unsigned int max_active_zones; -#endif /* CONFIG_BLK_DEV_ZONED */ - int node; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; @@ -668,63 +666,59 @@ static inline bool blk_queue_is_zoned(struct request_queue *q) } #ifdef CONFIG_BLK_DEV_ZONED -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { - return blk_queue_is_zoned(q) ? q->nr_zones : 0; + return blk_queue_is_zoned(disk->queue) ? disk->nr_zones : 0; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return 0; - return sector >> ilog2(q->limits.chunk_sectors); + return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { - if (!blk_queue_is_zoned(q)) + if (!blk_queue_is_zoned(disk->queue)) return false; - if (!q->conv_zones_bitmap) + if (!disk->conv_zones_bitmap) return true; - return !test_bit(blk_queue_zone_no(q, sector), q->conv_zones_bitmap); + return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap); } static inline void disk_set_max_open_zones(struct gendisk *disk, unsigned int max_open_zones) { - disk->queue->max_open_zones = max_open_zones; + disk->max_open_zones = max_open_zones; } static inline void disk_set_max_active_zones(struct gendisk *disk, unsigned int max_active_zones) { - disk->queue->max_active_zones = max_active_zones; + disk->max_active_zones = max_active_zones; } static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { - return bdev->bd_disk->queue->max_open_zones; + return bdev->bd_disk->max_open_zones; } static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { - return bdev->bd_disk->queue->max_active_zones; + return bdev->bd_disk->max_active_zones; } #else /* CONFIG_BLK_DEV_ZONED */ -static inline unsigned int blk_queue_nr_zones(struct request_queue *q) +static inline unsigned int disk_nr_zones(struct gendisk *disk) { return 0; } -static inline bool blk_queue_zone_is_seq(struct request_queue *q, - sector_t sector) +static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector) { return false; } -static inline unsigned int blk_queue_zone_no(struct request_queue *q, - sector_t sector) +static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) { return 0; } @@ -732,6 +726,7 @@ static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return 0; } + static inline unsigned int bdev_max_active_zones(struct block_device *bdev) { return 0; @@ -900,14 +895,12 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond); static inline unsigned int bio_zone_no(struct bio *bio) { - return blk_queue_zone_no(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } static inline unsigned int bio_zone_is_seq(struct bio *bio) { - return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev), - bio->bi_iter.bi_sector); + return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector); } /* From f3ec5d11554778c24ac8915e847223ed71d104fc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 11 Jul 2022 17:08:08 +0800 Subject: [PATCH 075/178] blk-mq: don't create hctx debugfs dir until q->debugfs_dir is created blk_mq_debugfs_register_hctx() can be called by blk_mq_update_nr_hw_queues when gendisk isn't added yet, such as nvme tcp. Fixes the warning of 'debugfs: Directory 'hctx0' with parent '/' already present!' which can be observed reliably when running blktests nvme/005. Fixes: 6cfc0081b046 ("blk-mq: no need to check return value of debugfs_create functions") Reported-by: Yi Zhang Signed-off-by: Ming Lei Tested-by: Yi Zhang Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220711090808.259682-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b11add9a95e2..7ee1b13380d0 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -728,6 +728,9 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i; + if (!q->debugfs_dir) + return; + snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); From f4b1e27db49c8b985b116aa99481b4c6a4342ed4 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 12 Jul 2022 17:05:47 +0200 Subject: [PATCH 076/178] block/rq_qos: Use atomic_try_cmpxchg in atomic_inc_below Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old in atomic_inc_below. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. No functional change intended. Signed-off-by: Uros Bizjak Cc: Jens Axboe Link: https://lore.kernel.org/r/20220712150547.5786-1-ubizjak@gmail.com Signed-off-by: Jens Axboe --- block/blk-rq-qos.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index d3a75693adbf..88f0fe7dcf54 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -10,16 +10,10 @@ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); - for (;;) { - unsigned int old; - + do { if (cur >= below) return false; - old = atomic_cmpxchg(v, cur, cur + 1); - if (old == cur) - break; - cur = old; - } + } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } From 939f9dd040fe1063d884f8f0f89b037093fe2341 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 12 Jul 2022 17:27:41 +0200 Subject: [PATCH 077/178] block: Use try_cmpxchg in update_io_ticks Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in update_io_ticks. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). No functional change intended. Signed-off-by: Uros Bizjak Cc: Jens Axboe Link: https://lore.kernel.org/r/20220712152741.7324-1-ubizjak@gmail.com Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index b530ce7b370c..8365996a8ef8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -943,7 +943,7 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end) again: stamp = READ_ONCE(part->bd_stamp); if (unlikely(time_after(now, stamp))) { - if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp)) + if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now))) __part_stat_add(part, io_ticks, end ? now - stamp : 1); } if (part->bd_partno) { From aee8960c2eae12636040dbf0f04e135273b1612d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 12 Jul 2022 17:19:47 +0200 Subject: [PATCH 078/178] blk-iolatency: Use atomic{,64}_try_cmpxchg Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old in check_scale_change and atomic64_try_cmpxchg in blkcg_iolatency_done_bio. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). No functional change intended. Signed-off-by: Uros Bizjak Cc: Jens Axboe Link: https://lore.kernel.org/r/20220712151947.6783-1-ubizjak@gmail.com Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 9568bf8dfe82..79745c6d8e15 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -401,7 +401,6 @@ static void check_scale_change(struct iolatency_grp *iolat) unsigned int cur_cookie; unsigned int our_cookie = atomic_read(&iolat->scale_cookie); u64 scale_lat; - unsigned int old; int direction = 0; if (lat_to_blkg(iolat)->parent == NULL) @@ -422,11 +421,10 @@ static void check_scale_change(struct iolatency_grp *iolat) else return; - old = atomic_cmpxchg(&iolat->scale_cookie, our_cookie, cur_cookie); - - /* Somebody beat us to the punch, just bail. */ - if (old != our_cookie) + if (!atomic_try_cmpxchg(&iolat->scale_cookie, &our_cookie, cur_cookie)) { + /* Somebody beat us to the punch, just bail. */ return; + } if (direction < 0 && iolat->min_lat_nsec) { u64 samples_thresh; @@ -633,8 +631,8 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio) window_start = atomic64_read(&iolat->window_start); if (now > window_start && (now - window_start) >= iolat->cur_win_nsec) { - if (atomic64_cmpxchg(&iolat->window_start, - window_start, now) == window_start) + if (atomic64_try_cmpxchg(&iolat->window_start, + &window_start, now)) iolatency_check_latencies(iolat, now); } } From 96388f57d2aad9836b2c589181fa1dbaba4066b4 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 12 Jul 2022 17:44:55 +0200 Subject: [PATCH 079/178] blk-cgroup: Use atomic{,64}_try_cmpxchg Use atomic_try_cmpxchg instead of atomic_cmpxchg (*ptr, old, new) == old in blkcg_unuse_delay, blkcg_set_delay and blkcg_clear_delay and atomic64_try_cmpxchg in blkcg_scale_delay. x86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_try_cmpxchg implicitly assigns old *ptr value to "old" when cmpxchg fails, enabling further code simplifications. No functional change intended. Signed-off-by: Uros Bizjak Cc: Jens Axboe Link: https://lore.kernel.org/r/20220712154455.66868-1-ubizjak@gmail.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 2 +- block/blk-cgroup.h | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 27a2d0ca0c70..869af9d72bcf 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1700,7 +1700,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) * everybody is happy with their IO latencies. */ if (time_before64(old + NSEC_PER_SEC, now) && - atomic64_cmpxchg(&blkg->delay_start, old, now) == old) { + atomic64_try_cmpxchg(&blkg->delay_start, &old, now)) { u64 cur = atomic64_read(&blkg->delay_nsec); u64 sub = min_t(u64, blkg->last_delay, now - old); int cur_use = atomic_read(&blkg->use_delay); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index d4de0a35e066..d2724d1dd7c9 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -430,12 +430,8 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) * then check to see if we were the last delay so we can drop the * congestion count on the cgroup. */ - while (old) { - int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); - if (cur == old) - break; - old = cur; - } + while (old && !atomic_try_cmpxchg(&blkg->use_delay, &old, old - 1)) + ; if (old == 0) return 0; @@ -458,7 +454,7 @@ static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay) int old = atomic_read(&blkg->use_delay); /* We only want 1 person setting the congestion count for this blkg. */ - if (!old && atomic_cmpxchg(&blkg->use_delay, old, -1) == old) + if (!old && atomic_try_cmpxchg(&blkg->use_delay, &old, -1)) atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); atomic64_set(&blkg->delay_nsec, delay); @@ -475,7 +471,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg) int old = atomic_read(&blkg->use_delay); /* We only want 1 person clearing the congestion count for this blkg. */ - if (old && atomic_cmpxchg(&blkg->use_delay, old, 0) == old) + if (old && atomic_try_cmpxchg(&blkg->use_delay, &old, 0)) atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); } From 71f28f3136aff5890cd56de78abc673f8393cad9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Jul 2022 22:07:10 +0800 Subject: [PATCH 080/178] ublk_drv: add io_uring based userspace block driver This is the driver part of userspace block driver(ublk driver), the other part is userspace daemon part(ublksrv)[1]. The two parts communicate by io_uring's IORING_OP_URING_CMD with one shared cmd buffer for storing io command, and the buffer is read only for ublksrv, each io command is indexed by io request tag directly, and is written by ublk driver. For example, when one READ io request is submitted to ublk block driver, ublk driver stores the io command into cmd buffer first, then completes one IORING_OP_URING_CMD for notifying ublksrv, and the URING_CMD is issued to ublk driver beforehand by ublksrv for getting notification of any new io request, and each URING_CMD is associated with one io request by tag. After ublksrv gets the io command, it translates and handles the ublk io request, such as, for the ublk-loop target, ublksrv translates the request into same request on another file or disk, like the kernel loop block driver. In ublksrv's implementation, the io is still handled by io_uring, and share same ring with IORING_OP_URING_CMD command. When the target io request is done, the same IORING_OP_URING_CMD is issued to ublk driver for both committing io request result and getting future notification of new io request. Another thing done by ublk driver is to copy data between kernel io request and ublksrv's io buffer: 1) before ubsrv handles WRITE request, copy the request's data into ublksrv's userspace io buffer, so that ublksrv can handle the write request 2) after ubsrv handles READ request, copy ublksrv's userspace io buffer into this READ request, then ublk driver can complete the READ request Zero copy may be switched if mm is ready to support it. ublk driver doesn't handle any logic of the specific user space driver, so it is small/simple enough. [1] ublksrv https://github.com/ming1/ubdsrv Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220713140711.97356-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 9 + drivers/block/Makefile | 2 + drivers/block/ublk_drv.c | 1530 +++++++++++++++++++++++++++++++++ include/uapi/linux/ublk_cmd.h | 156 ++++ 4 files changed, 1697 insertions(+) create mode 100644 drivers/block/ublk_drv.c create mode 100644 include/uapi/linux/ublk_cmd.h diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index fdb81f2794cd..e19fcab016ba 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -408,6 +408,15 @@ config BLK_DEV_RBD If unsure, say N. +config BLK_DEV_UBLK + tristate "Userspace block driver (Experimental)" + select IO_URING + help + io_uring based userspace block driver. Together with ublk server, ublk + has been working well, but interface with userspace or command data + definition isn't finalized yet, and might change according to future + requirement, so mark is as experimental now. + source "drivers/block/rnbd/Kconfig" endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 934a9c7c3a7c..be631352567e 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -39,4 +39,6 @@ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ +obj-$(CONFIG_BLK_DEV_UBLK) += ublk_drv.o + swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c new file mode 100644 index 000000000000..922a84c86fc6 --- /dev/null +++ b/drivers/block/ublk_drv.c @@ -0,0 +1,1530 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Userspace block device - block device which IO is handled from userspace + * + * Take full use of io_uring passthrough command for communicating with + * ublk userspace daemon(ublksrvd) for handling basic IO request. + * + * Copyright 2022 Ming Lei + * + * (part of code stolen from loop.c) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UBLK_MINORS (1U << MINORBITS) + +struct ublk_uring_cmd_pdu { + struct request *req; +}; + +/* + * io command is active: sqe cmd is received, and its cqe isn't done + * + * If the flag is set, the io command is owned by ublk driver, and waited + * for incoming blk-mq request from the ublk block device. + * + * If the flag is cleared, the io command will be completed, and owned by + * ublk server. + */ +#define UBLK_IO_FLAG_ACTIVE 0x01 + +/* + * IO command is completed via cqe, and it is being handled by ublksrv, and + * not committed yet + * + * Basically exclusively with UBLK_IO_FLAG_ACTIVE, so can be served for + * cross verification + */ +#define UBLK_IO_FLAG_OWNED_BY_SRV 0x02 + +/* + * IO command is aborted, so this flag is set in case of + * !UBLK_IO_FLAG_ACTIVE. + * + * After this flag is observed, any pending or new incoming request + * associated with this io command will be failed immediately + */ +#define UBLK_IO_FLAG_ABORTED 0x04 + +struct ublk_io { + /* userspace buffer address from io cmd */ + __u64 addr; + unsigned int flags; + int res; + + struct io_uring_cmd *cmd; +}; + +struct ublk_queue { + int q_id; + int q_depth; + + struct task_struct *ubq_daemon; + char *io_cmd_buf; + + unsigned long io_addr; /* mapped vm address */ + unsigned int max_io_sz; + bool abort_work_pending; + unsigned short nr_io_ready; /* how many ios setup */ + struct ublk_device *dev; + struct ublk_io ios[0]; +}; + +#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ) + +struct ublk_device { + struct gendisk *ub_disk; + struct request_queue *ub_queue; + + char *__queues; + + unsigned short queue_size; + unsigned short bs_shift; + struct ublksrv_ctrl_dev_info dev_info; + + struct blk_mq_tag_set tag_set; + + struct cdev cdev; + struct device cdev_dev; + + atomic_t ch_open_cnt; + int ub_number; + + struct mutex mutex; + + struct mm_struct *mm; + + struct completion completion; + unsigned int nr_queues_ready; + atomic_t nr_aborted_queues; + + /* + * Our ubq->daemon may be killed without any notification, so + * monitor each queue's daemon periodically + */ + struct delayed_work monitor_work; + struct work_struct stop_work; +}; + +static dev_t ublk_chr_devt; +static struct class *ublk_chr_class; + +static DEFINE_IDR(ublk_index_idr); +static DEFINE_SPINLOCK(ublk_idr_lock); +static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */ + +static DEFINE_MUTEX(ublk_ctl_mutex); + +static struct miscdevice ublk_misc; + +static struct ublk_device *ublk_get_device(struct ublk_device *ub) +{ + if (kobject_get_unless_zero(&ub->cdev_dev.kobj)) + return ub; + return NULL; +} + +static void ublk_put_device(struct ublk_device *ub) +{ + put_device(&ub->cdev_dev); +} + +static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev, + int qid) +{ + return (struct ublk_queue *)&(dev->__queues[qid * dev->queue_size]); +} + +static inline bool ublk_rq_has_data(const struct request *rq) +{ + return rq->bio && bio_has_data(rq->bio); +} + +static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq, + int tag) +{ + return (struct ublksrv_io_desc *) + &(ubq->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); +} + +static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id) +{ + return ublk_get_queue(ub, q_id)->io_cmd_buf; +} + +static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) +{ + struct ublk_queue *ubq = ublk_get_queue(ub, q_id); + + return round_up(ubq->q_depth * sizeof(struct ublksrv_io_desc), + PAGE_SIZE); +} + +static int ublk_open(struct block_device *bdev, fmode_t mode) +{ + return 0; +} + +static void ublk_release(struct gendisk *disk, fmode_t mode) +{ +} + +static const struct block_device_operations ub_fops = { + .owner = THIS_MODULE, + .open = ublk_open, + .release = ublk_release, +}; + +#define UBLK_MAX_PIN_PAGES 32 + +struct ublk_map_data { + const struct ublk_queue *ubq; + const struct request *rq; + const struct ublk_io *io; + unsigned max_bytes; +}; + +struct ublk_io_iter { + struct page *pages[UBLK_MAX_PIN_PAGES]; + unsigned pg_off; /* offset in the 1st page in pages */ + int nr_pages; /* how many page pointers in pages */ + struct bio *bio; + struct bvec_iter iter; +}; + +static inline unsigned ublk_copy_io_pages(struct ublk_io_iter *data, + unsigned max_bytes, bool to_vm) +{ + const unsigned total = min_t(unsigned, max_bytes, + PAGE_SIZE - data->pg_off + + ((data->nr_pages - 1) << PAGE_SHIFT)); + unsigned done = 0; + unsigned pg_idx = 0; + + while (done < total) { + struct bio_vec bv = bio_iter_iovec(data->bio, data->iter); + const unsigned int bytes = min3(bv.bv_len, total - done, + (unsigned)(PAGE_SIZE - data->pg_off)); + void *bv_buf = bvec_kmap_local(&bv); + void *pg_buf = kmap_local_page(data->pages[pg_idx]); + + if (to_vm) + memcpy(pg_buf + data->pg_off, bv_buf, bytes); + else + memcpy(bv_buf, pg_buf + data->pg_off, bytes); + + kunmap_local(pg_buf); + kunmap_local(bv_buf); + + /* advance page array */ + data->pg_off += bytes; + if (data->pg_off == PAGE_SIZE) { + pg_idx += 1; + data->pg_off = 0; + } + + done += bytes; + + /* advance bio */ + bio_advance_iter_single(data->bio, &data->iter, bytes); + if (!data->iter.bi_size) { + data->bio = data->bio->bi_next; + if (data->bio == NULL) + break; + data->iter = data->bio->bi_iter; + } + } + + return done; +} + +static inline int ublk_copy_user_pages(struct ublk_map_data *data, + bool to_vm) +{ + const unsigned int gup_flags = to_vm ? FOLL_WRITE : 0; + const unsigned long start_vm = data->io->addr; + unsigned int done = 0; + struct ublk_io_iter iter = { + .pg_off = start_vm & (PAGE_SIZE - 1), + .bio = data->rq->bio, + .iter = data->rq->bio->bi_iter, + }; + const unsigned int nr_pages = round_up(data->max_bytes + + (start_vm & (PAGE_SIZE - 1)), PAGE_SIZE) >> PAGE_SHIFT; + + while (done < nr_pages) { + const unsigned to_pin = min_t(unsigned, UBLK_MAX_PIN_PAGES, + nr_pages - done); + unsigned i, len; + + iter.nr_pages = get_user_pages_fast(start_vm + + (done << PAGE_SHIFT), to_pin, gup_flags, + iter.pages); + if (iter.nr_pages <= 0) + return done == 0 ? iter.nr_pages : done; + len = ublk_copy_io_pages(&iter, data->max_bytes, to_vm); + for (i = 0; i < iter.nr_pages; i++) { + if (to_vm) + set_page_dirty(iter.pages[i]); + put_page(iter.pages[i]); + } + data->max_bytes -= len; + done += iter.nr_pages; + } + + return done; +} + +static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req, + struct ublk_io *io) +{ + const unsigned int rq_bytes = blk_rq_bytes(req); + /* + * no zero copy, we delay copy WRITE request data into ublksrv + * context and the big benefit is that pinning pages in current + * context is pretty fast, see ublk_pin_user_pages + */ + if (req_op(req) != REQ_OP_WRITE && req_op(req) != REQ_OP_FLUSH) + return rq_bytes; + + if (ublk_rq_has_data(req)) { + struct ublk_map_data data = { + .ubq = ubq, + .rq = req, + .io = io, + .max_bytes = rq_bytes, + }; + + ublk_copy_user_pages(&data, true); + + return rq_bytes - data.max_bytes; + } + return rq_bytes; +} + +static int ublk_unmap_io(const struct ublk_queue *ubq, + const struct request *req, + struct ublk_io *io) +{ + const unsigned int rq_bytes = blk_rq_bytes(req); + + if (req_op(req) == REQ_OP_READ && ublk_rq_has_data(req)) { + struct ublk_map_data data = { + .ubq = ubq, + .rq = req, + .io = io, + .max_bytes = io->res, + }; + + WARN_ON_ONCE(io->res > rq_bytes); + + ublk_copy_user_pages(&data, false); + + return io->res - data.max_bytes; + } + return rq_bytes; +} + +static inline unsigned int ublk_req_build_flags(struct request *req) +{ + unsigned flags = 0; + + if (req->cmd_flags & REQ_FAILFAST_DEV) + flags |= UBLK_IO_F_FAILFAST_DEV; + + if (req->cmd_flags & REQ_FAILFAST_TRANSPORT) + flags |= UBLK_IO_F_FAILFAST_TRANSPORT; + + if (req->cmd_flags & REQ_FAILFAST_DRIVER) + flags |= UBLK_IO_F_FAILFAST_DRIVER; + + if (req->cmd_flags & REQ_META) + flags |= UBLK_IO_F_META; + + if (req->cmd_flags & REQ_INTEGRITY) + flags |= UBLK_IO_F_INTEGRITY; + + if (req->cmd_flags & REQ_FUA) + flags |= UBLK_IO_F_FUA; + + if (req->cmd_flags & REQ_PREFLUSH) + flags |= UBLK_IO_F_PREFLUSH; + + if (req->cmd_flags & REQ_NOUNMAP) + flags |= UBLK_IO_F_NOUNMAP; + + if (req->cmd_flags & REQ_SWAP) + flags |= UBLK_IO_F_SWAP; + + return flags; +} + +static int ublk_setup_iod(struct ublk_queue *ubq, struct request *req) +{ + struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); + struct ublk_io *io = &ubq->ios[req->tag]; + u32 ublk_op; + + switch (req_op(req)) { + case REQ_OP_READ: + ublk_op = UBLK_IO_OP_READ; + break; + case REQ_OP_WRITE: + ublk_op = UBLK_IO_OP_WRITE; + break; + case REQ_OP_FLUSH: + ublk_op = UBLK_IO_OP_FLUSH; + break; + case REQ_OP_DISCARD: + ublk_op = UBLK_IO_OP_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + ublk_op = UBLK_IO_OP_WRITE_ZEROES; + break; + default: + return BLK_STS_IOERR; + } + + /* need to translate since kernel may change */ + iod->op_flags = ublk_op | ublk_req_build_flags(req); + iod->nr_sectors = blk_rq_sectors(req); + iod->start_sector = blk_rq_pos(req); + iod->addr = io->addr; + + return BLK_STS_OK; +} + +static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu( + struct io_uring_cmd *ioucmd) +{ + return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu; +} + +static bool ubq_daemon_is_dying(struct ublk_queue *ubq) +{ + return ubq->ubq_daemon->flags & PF_EXITING; +} + +/* todo: handle partial completion */ +static void ublk_complete_rq(struct request *req) +{ + struct ublk_queue *ubq = req->mq_hctx->driver_data; + struct ublk_io *io = &ubq->ios[req->tag]; + unsigned int unmapped_bytes; + + /* failed read IO if nothing is read */ + if (!io->res && req_op(req) == REQ_OP_READ) + io->res = -EIO; + + if (io->res < 0) { + blk_mq_end_request(req, errno_to_blk_status(io->res)); + return; + } + + /* + * FLUSH or DISCARD usually won't return bytes returned, so end them + * directly. + * + * Both the two needn't unmap. + */ + if (req_op(req) != REQ_OP_READ && req_op(req) != REQ_OP_WRITE) { + blk_mq_end_request(req, BLK_STS_OK); + return; + } + + /* for READ request, writing data in iod->addr to rq buffers */ + unmapped_bytes = ublk_unmap_io(ubq, req, io); + + /* + * Extremely impossible since we got data filled in just before + * + * Re-read simply for this unlikely case. + */ + if (unlikely(unmapped_bytes < io->res)) + io->res = unmapped_bytes; + + if (blk_update_request(req, BLK_STS_OK, io->res)) + blk_mq_requeue_request(req, true); + else + __blk_mq_end_request(req, BLK_STS_OK); +} + +/* + * __ublk_fail_req() may be called from abort context or ->ubq_daemon + * context during exiting, so lock is required. + * + * Also aborting may not be started yet, keep in mind that one failed + * request may be issued by block layer again. + */ +static void __ublk_fail_req(struct ublk_io *io, struct request *req) +{ + WARN_ON_ONCE(io->flags & UBLK_IO_FLAG_ACTIVE); + + if (!(io->flags & UBLK_IO_FLAG_ABORTED)) { + io->flags |= UBLK_IO_FLAG_ABORTED; + blk_mq_end_request(req, BLK_STS_IOERR); + } +} + +#define UBLK_REQUEUE_DELAY_MS 3 + +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + struct ublk_device *ub = cmd->file->private_data; + struct request *req = pdu->req; + struct ublk_queue *ubq = req->mq_hctx->driver_data; + int tag = req->tag; + struct ublk_io *io = &ubq->ios[tag]; + bool task_exiting = current != ubq->ubq_daemon || + (current->flags & PF_EXITING); + unsigned int mapped_bytes; + + pr_devel("%s: complete: op %d, qid %d tag %d io_flags %x addr %llx\n", + __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags, + ublk_get_iod(ubq, req->tag)->addr); + + if (unlikely(task_exiting)) { + blk_mq_end_request(req, BLK_STS_IOERR); + mod_delayed_work(system_wq, &ub->monitor_work, 0); + return; + } + + mapped_bytes = ublk_map_io(ubq, req, io); + + /* partially mapped, update io descriptor */ + if (unlikely(mapped_bytes != blk_rq_bytes(req))) { + /* + * Nothing mapped, retry until we succeed. + * + * We may never succeed in mapping any bytes here because + * of OOM. TODO: reserve one buffer with single page pinned + * for providing forward progress guarantee. + */ + if (unlikely(!mapped_bytes)) { + blk_mq_requeue_request(req, false); + blk_mq_delay_kick_requeue_list(req->q, + UBLK_REQUEUE_DELAY_MS); + return; + } + + ublk_get_iod(ubq, req->tag)->nr_sectors = + mapped_bytes >> 9; + } + + /* mark this cmd owned by ublksrv */ + io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; + + /* + * clear ACTIVE since we are done with this sqe/cmd slot + * We can only accept io cmd in case of being not active. + */ + io->flags &= ~UBLK_IO_FLAG_ACTIVE; + + /* tell ublksrv one io request is coming */ + io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0); +} + +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct ublk_queue *ubq = hctx->driver_data; + struct request *rq = bd->rq; + struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + blk_status_t res; + + /* fill iod to slot in io cmd buffer */ + res = ublk_setup_iod(ubq, rq); + if (unlikely(res != BLK_STS_OK)) + return BLK_STS_IOERR; + + blk_mq_start_request(bd->rq); + + if (unlikely(ubq_daemon_is_dying(ubq))) { + mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); + return BLK_STS_IOERR; + } + + pdu->req = rq; + io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + + return BLK_STS_OK; +} + + +static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, + unsigned int hctx_idx) +{ + struct ublk_device *ub = hctx->queue->queuedata; + struct ublk_queue *ubq = ublk_get_queue(ub, hctx->queue_num); + + hctx->driver_data = ubq; + return 0; +} + +static const struct blk_mq_ops ublk_mq_ops = { + .queue_rq = ublk_queue_rq, + .init_hctx = ublk_init_hctx, +}; + +static int ublk_ch_open(struct inode *inode, struct file *filp) +{ + struct ublk_device *ub = container_of(inode->i_cdev, + struct ublk_device, cdev); + + if (atomic_cmpxchg(&ub->ch_open_cnt, 0, 1) == 0) { + filp->private_data = ub; + return 0; + } + return -EBUSY; +} + +static int ublk_ch_release(struct inode *inode, struct file *filp) +{ + struct ublk_device *ub = filp->private_data; + + while (atomic_cmpxchg(&ub->ch_open_cnt, 1, 0) != 1) + cpu_relax(); + + filp->private_data = NULL; + return 0; +} + +/* map pre-allocated per-queue cmd buffer to ublksrv daemon */ +static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ublk_device *ub = filp->private_data; + size_t sz = vma->vm_end - vma->vm_start; + unsigned max_sz = UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc); + unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT; + int q_id, ret = 0; + + mutex_lock(&ub->mutex); + if (!ub->mm) + ub->mm = current->mm; + if (current->mm != ub->mm) + ret = -EINVAL; + mutex_unlock(&ub->mutex); + + if (ret) + return ret; + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + end = UBLKSRV_CMD_BUF_OFFSET + ub->dev_info.nr_hw_queues * max_sz; + if (phys_off < UBLKSRV_CMD_BUF_OFFSET || phys_off >= end) + return -EINVAL; + + q_id = (phys_off - UBLKSRV_CMD_BUF_OFFSET) / max_sz; + pr_devel("%s: qid %d, pid %d, addr %lx pg_off %lx sz %lu\n", + __func__, q_id, current->pid, vma->vm_start, + phys_off, (unsigned long)sz); + + if (sz != ublk_queue_cmd_buf_size(ub, q_id)) + return -EINVAL; + + pfn = virt_to_phys(ublk_queue_cmd_buf(ub, q_id)) >> PAGE_SHIFT; + return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); +} + +static void ublk_commit_completion(struct ublk_device *ub, + struct ublksrv_io_cmd *ub_cmd) +{ + u32 qid = ub_cmd->q_id, tag = ub_cmd->tag; + struct ublk_queue *ubq = ublk_get_queue(ub, qid); + struct ublk_io *io = &ubq->ios[tag]; + struct request *req; + + /* now this cmd slot is owned by nbd driver */ + io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV; + io->res = ub_cmd->result; + + /* find the io request and complete */ + req = blk_mq_tag_to_rq(ub->tag_set.tags[qid], tag); + + if (req && likely(!blk_should_fake_timeout(req->q))) + ublk_complete_rq(req); +} + +/* + * When ->ubq_daemon is exiting, either new request is ended immediately, + * or any queued io command is drained, so it is safe to abort queue + * lockless + */ +static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq) +{ + int i; + + if (!ublk_get_device(ub)) + return; + + for (i = 0; i < ubq->q_depth; i++) { + struct ublk_io *io = &ubq->ios[i]; + + if (!(io->flags & UBLK_IO_FLAG_ACTIVE)) { + struct request *rq; + + /* + * Either we fail the request or ublk_rq_task_work_fn + * will do it + */ + rq = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], i); + if (rq) + __ublk_fail_req(io, rq); + } + } + ublk_put_device(ub); +} + +static void ublk_daemon_monitor_work(struct work_struct *work) +{ + struct ublk_device *ub = + container_of(work, struct ublk_device, monitor_work.work); + int i; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) { + struct ublk_queue *ubq = ublk_get_queue(ub, i); + + if (ubq_daemon_is_dying(ubq)) { + schedule_work(&ub->stop_work); + + /* abort queue is for making forward progress */ + ublk_abort_queue(ub, ubq); + } + } + + /* + * We can't schedule monitor work after ublk_remove() is started. + * + * No need ub->mutex, monitor work are canceled after state is marked + * as DEAD, so DEAD state is observed reliably. + */ + if (ub->dev_info.state != UBLK_S_DEV_DEAD) + schedule_delayed_work(&ub->monitor_work, + UBLK_DAEMON_MONITOR_PERIOD); +} + +static void ublk_cancel_queue(struct ublk_queue *ubq) +{ + int i; + + for (i = 0; i < ubq->q_depth; i++) { + struct ublk_io *io = &ubq->ios[i]; + + if (io->flags & UBLK_IO_FLAG_ACTIVE) + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0); + } +} + +/* Cancel all pending commands, must be called after del_gendisk() returns */ +static void ublk_cancel_dev(struct ublk_device *ub) +{ + int i; + + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) + ublk_cancel_queue(ublk_get_queue(ub, i)); +} + +static void ublk_stop_dev(struct ublk_device *ub) +{ + mutex_lock(&ub->mutex); + if (!disk_live(ub->ub_disk)) + goto unlock; + + del_gendisk(ub->ub_disk); + ub->dev_info.state = UBLK_S_DEV_DEAD; + ub->dev_info.ublksrv_pid = -1; + ublk_cancel_dev(ub); + unlock: + mutex_unlock(&ub->mutex); + cancel_delayed_work_sync(&ub->monitor_work); +} + +static int ublk_ctrl_stop_dev(struct ublk_device *ub) +{ + ublk_stop_dev(ub); + cancel_work_sync(&ub->stop_work); + return 0; +} + +static inline bool ublk_queue_ready(struct ublk_queue *ubq) +{ + return ubq->nr_io_ready == ubq->q_depth; +} + +/* device can only be started after all IOs are ready */ +static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) +{ + mutex_lock(&ub->mutex); + ubq->nr_io_ready++; + if (ublk_queue_ready(ubq)) { + ubq->ubq_daemon = current; + get_task_struct(ubq->ubq_daemon); + ub->nr_queues_ready++; + } + if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) + complete_all(&ub->completion); + mutex_unlock(&ub->mutex); +} + +static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd; + struct ublk_device *ub = cmd->file->private_data; + struct ublk_queue *ubq; + struct ublk_io *io; + u32 cmd_op = cmd->cmd_op; + unsigned tag = ub_cmd->tag; + int ret = -EINVAL; + + pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n", + __func__, cmd->cmd_op, ub_cmd->q_id, tag, + ub_cmd->result); + + if (!(issue_flags & IO_URING_F_SQE128)) + goto out; + + if (ub_cmd->q_id >= ub->dev_info.nr_hw_queues) + goto out; + + ubq = ublk_get_queue(ub, ub_cmd->q_id); + if (!ubq || ub_cmd->q_id != ubq->q_id) + goto out; + + if (ubq->ubq_daemon && ubq->ubq_daemon != current) + goto out; + + if (tag >= ubq->q_depth) + goto out; + + io = &ubq->ios[tag]; + + /* there is pending io cmd, something must be wrong */ + if (io->flags & UBLK_IO_FLAG_ACTIVE) { + ret = -EBUSY; + goto out; + } + + switch (cmd_op) { + case UBLK_IO_FETCH_REQ: + /* UBLK_IO_FETCH_REQ is only allowed before queue is setup */ + if (ublk_queue_ready(ubq)) { + ret = -EBUSY; + goto out; + } + /* + * The io is being handled by server, so COMMIT_RQ is expected + * instead of FETCH_REQ + */ + if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV) + goto out; + /* FETCH_RQ has to provide IO buffer */ + if (!ub_cmd->addr) + goto out; + io->cmd = cmd; + io->flags |= UBLK_IO_FLAG_ACTIVE; + io->addr = ub_cmd->addr; + + ublk_mark_io_ready(ub, ubq); + break; + case UBLK_IO_COMMIT_AND_FETCH_REQ: + /* FETCH_RQ has to provide IO buffer */ + if (!ub_cmd->addr) + goto out; + if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)) + goto out; + io->addr = ub_cmd->addr; + io->flags |= UBLK_IO_FLAG_ACTIVE; + io->cmd = cmd; + ublk_commit_completion(ub, ub_cmd); + break; + default: + goto out; + } + return -EIOCBQUEUED; + + out: + io->flags &= ~UBLK_IO_FLAG_ACTIVE; + io_uring_cmd_done(cmd, ret, 0); + pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", + __func__, cmd_op, tag, ret, io->flags); + return -EIOCBQUEUED; +} + +static const struct file_operations ublk_ch_fops = { + .owner = THIS_MODULE, + .open = ublk_ch_open, + .release = ublk_ch_release, + .llseek = no_llseek, + .uring_cmd = ublk_ch_uring_cmd, + .mmap = ublk_ch_mmap, +}; + +static void ublk_deinit_queue(struct ublk_device *ub, int q_id) +{ + int size = ublk_queue_cmd_buf_size(ub, q_id); + struct ublk_queue *ubq = ublk_get_queue(ub, q_id); + + if (ubq->ubq_daemon) + put_task_struct(ubq->ubq_daemon); + if (ubq->io_cmd_buf) + free_pages((unsigned long)ubq->io_cmd_buf, get_order(size)); +} + +static int ublk_init_queue(struct ublk_device *ub, int q_id) +{ + struct ublk_queue *ubq = ublk_get_queue(ub, q_id); + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO; + void *ptr; + int size; + + ubq->q_id = q_id; + ubq->q_depth = ub->dev_info.queue_depth; + size = ublk_queue_cmd_buf_size(ub, q_id); + + ptr = (void *) __get_free_pages(gfp_flags, get_order(size)); + if (!ptr) + return -ENOMEM; + + ubq->io_cmd_buf = ptr; + ubq->dev = ub; + return 0; +} + +static void ublk_deinit_queues(struct ublk_device *ub) +{ + int nr_queues = ub->dev_info.nr_hw_queues; + int i; + + if (!ub->__queues) + return; + + for (i = 0; i < nr_queues; i++) + ublk_deinit_queue(ub, i); + kfree(ub->__queues); +} + +static int ublk_init_queues(struct ublk_device *ub) +{ + int nr_queues = ub->dev_info.nr_hw_queues; + int depth = ub->dev_info.queue_depth; + int ubq_size = sizeof(struct ublk_queue) + depth * sizeof(struct ublk_io); + int i, ret = -ENOMEM; + + ub->queue_size = ubq_size; + ub->__queues = kcalloc(nr_queues, ubq_size, GFP_KERNEL); + if (!ub->__queues) + return ret; + + for (i = 0; i < nr_queues; i++) { + if (ublk_init_queue(ub, i)) + goto fail; + } + + init_completion(&ub->completion); + return 0; + + fail: + ublk_deinit_queues(ub); + return ret; +} + +static int __ublk_alloc_dev_number(struct ublk_device *ub, int idx) +{ + int i = idx; + int err; + + spin_lock(&ublk_idr_lock); + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (i >= 0) { + err = idr_alloc(&ublk_index_idr, ub, i, i + 1, GFP_NOWAIT); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&ublk_index_idr, ub, 0, 0, GFP_NOWAIT); + } + spin_unlock(&ublk_idr_lock); + + if (err >= 0) + ub->ub_number = err; + + return err; +} + +static struct ublk_device *__ublk_create_dev(int idx) +{ + struct ublk_device *ub = NULL; + int ret; + + ub = kzalloc(sizeof(*ub), GFP_KERNEL); + if (!ub) + return ERR_PTR(-ENOMEM); + + ret = __ublk_alloc_dev_number(ub, idx); + if (ret < 0) { + kfree(ub); + return ERR_PTR(ret); + } + return ub; +} + +static void __ublk_destroy_dev(struct ublk_device *ub) +{ + spin_lock(&ublk_idr_lock); + idr_remove(&ublk_index_idr, ub->ub_number); + wake_up_all(&ublk_idr_wq); + spin_unlock(&ublk_idr_lock); + + mutex_destroy(&ub->mutex); + + kfree(ub); +} + +static void ublk_cdev_rel(struct device *dev) +{ + struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev); + + put_disk(ub->ub_disk); + + blk_mq_free_tag_set(&ub->tag_set); + + ublk_deinit_queues(ub); + + __ublk_destroy_dev(ub); +} + +static int ublk_add_chdev(struct ublk_device *ub) +{ + struct device *dev = &ub->cdev_dev; + int minor = ub->ub_number; + int ret; + + dev->parent = ublk_misc.this_device; + dev->devt = MKDEV(MAJOR(ublk_chr_devt), minor); + dev->class = ublk_chr_class; + dev->release = ublk_cdev_rel; + device_initialize(dev); + + ret = dev_set_name(dev, "ublkc%d", minor); + if (ret) + goto fail; + + cdev_init(&ub->cdev, &ublk_ch_fops); + ret = cdev_device_add(&ub->cdev, dev); + if (ret) + goto fail; + return 0; + fail: + put_device(dev); + return ret; +} + +static void ublk_stop_work_fn(struct work_struct *work) +{ + struct ublk_device *ub = + container_of(work, struct ublk_device, stop_work); + + ublk_stop_dev(ub); +} + +static void ublk_update_capacity(struct ublk_device *ub) +{ + unsigned int max_rq_bytes; + + /* make max request buffer size aligned with PAGE_SIZE */ + max_rq_bytes = round_down(ub->dev_info.rq_max_blocks << + ub->bs_shift, PAGE_SIZE); + ub->dev_info.rq_max_blocks = max_rq_bytes >> ub->bs_shift; + + set_capacity(ub->ub_disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9)); +} + +/* add disk & cdev, cleanup everything in case of failure */ +static int ublk_add_dev(struct ublk_device *ub) +{ + struct gendisk *disk; + int err = -ENOMEM; + int bsize; + + /* We are not ready to support zero copy */ + ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; + + bsize = ub->dev_info.block_size; + ub->bs_shift = ilog2(bsize); + + ub->dev_info.nr_hw_queues = min_t(unsigned int, + ub->dev_info.nr_hw_queues, nr_cpu_ids); + + INIT_WORK(&ub->stop_work, ublk_stop_work_fn); + INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work); + + if (ublk_init_queues(ub)) + goto out_destroy_dev; + + ub->tag_set.ops = &ublk_mq_ops; + ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues; + ub->tag_set.queue_depth = ub->dev_info.queue_depth; + ub->tag_set.numa_node = NUMA_NO_NODE; + ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ub->tag_set.driver_data = ub; + + err = blk_mq_alloc_tag_set(&ub->tag_set); + if (err) + goto out_deinit_queues; + + disk = ub->ub_disk = blk_mq_alloc_disk(&ub->tag_set, ub); + if (IS_ERR(disk)) { + err = PTR_ERR(disk); + goto out_cleanup_tags; + } + ub->ub_queue = ub->ub_disk->queue; + + ub->ub_queue->queuedata = ub; + + blk_queue_logical_block_size(ub->ub_queue, bsize); + blk_queue_physical_block_size(ub->ub_queue, bsize); + blk_queue_io_min(ub->ub_queue, bsize); + + blk_queue_max_hw_sectors(ub->ub_queue, ub->dev_info.rq_max_blocks << + (ub->bs_shift - 9)); + + ub->ub_queue->limits.discard_granularity = PAGE_SIZE; + + blk_queue_max_discard_sectors(ub->ub_queue, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(ub->ub_queue, UINT_MAX >> 9); + + ublk_update_capacity(ub); + + disk->fops = &ub_fops; + disk->private_data = ub; + disk->queue = ub->ub_queue; + sprintf(disk->disk_name, "ublkb%d", ub->ub_number); + + mutex_init(&ub->mutex); + + /* add char dev so that ublksrv daemon can be setup */ + err = ublk_add_chdev(ub); + if (err) + return err; + + /* don't expose disk now until we got start command from cdev */ + + return 0; + +out_cleanup_tags: + blk_mq_free_tag_set(&ub->tag_set); +out_deinit_queues: + ublk_deinit_queues(ub); +out_destroy_dev: + __ublk_destroy_dev(ub); + return err; +} + +static void ublk_remove(struct ublk_device *ub) +{ + ublk_ctrl_stop_dev(ub); + + cdev_device_del(&ub->cdev, &ub->cdev_dev); + put_device(&ub->cdev_dev); +} + +static struct ublk_device *ublk_get_device_from_id(int idx) +{ + struct ublk_device *ub = NULL; + + if (idx < 0) + return NULL; + + spin_lock(&ublk_idr_lock); + ub = idr_find(&ublk_index_idr, idx); + if (ub) + ub = ublk_get_device(ub); + spin_unlock(&ublk_idr_lock); + + return ub; +} + +static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + int ret = -EINVAL; + int ublksrv_pid = (int)header->data[0]; + unsigned long dev_blocks = header->data[1]; + + if (ublksrv_pid <= 0) + return ret; + + wait_for_completion_interruptible(&ub->completion); + + schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); + + mutex_lock(&ub->mutex); + if (!disk_live(ub->ub_disk)) { + /* We may get disk size updated */ + if (dev_blocks) { + ub->dev_info.dev_blocks = dev_blocks; + ublk_update_capacity(ub); + } + ub->dev_info.ublksrv_pid = ublksrv_pid; + ret = add_disk(ub->ub_disk); + if (!ret) + ub->dev_info.state = UBLK_S_DEV_LIVE; + } else { + ret = -EEXIST; + } + mutex_unlock(&ub->mutex); + + return ret; +} + +static struct blk_mq_hw_ctx *ublk_get_hw_queue(struct ublk_device *ub, + unsigned int index) +{ + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + queue_for_each_hw_ctx(ub->ub_queue, hctx, i) + if (hctx->queue_num == index) + return hctx; + return NULL; +} + +static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + void __user *argp = (void __user *)(unsigned long)header->addr; + struct blk_mq_hw_ctx *hctx; + struct ublk_device *ub; + unsigned long queue; + unsigned int retlen; + int ret; + + ub = ublk_get_device_from_id(header->dev_id); + if (!ub) + goto out; + + ret = -EINVAL; + queue = header->data[0]; + if (queue >= ub->dev_info.nr_hw_queues) + goto out; + hctx = ublk_get_hw_queue(ub, queue); + if (!hctx) + goto out; + + retlen = min_t(unsigned short, header->len, cpumask_size()); + if (copy_to_user(argp, hctx->cpumask, retlen)) { + ret = -EFAULT; + goto out; + } + if (retlen != header->len) { + if (clear_user(argp + retlen, header->len - retlen)) { + ret = -EFAULT; + goto out; + } + } + ret = 0; + out: + if (ub) + ublk_put_device(ub); + return ret; +} + +static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_dev_info *info, + void __user *argp, int idx) +{ + struct ublk_device *ub; + int ret; + + ret = mutex_lock_killable(&ublk_ctl_mutex); + if (ret) + return ret; + + ub = __ublk_create_dev(idx); + if (!IS_ERR_OR_NULL(ub)) { + memcpy(&ub->dev_info, info, sizeof(*info)); + + /* update device id */ + ub->dev_info.dev_id = ub->ub_number; + + ret = ublk_add_dev(ub); + if (!ret) { + if (copy_to_user(argp, &ub->dev_info, sizeof(*info))) { + ublk_remove(ub); + ret = -EFAULT; + } + } + } else { + if (IS_ERR(ub)) + ret = PTR_ERR(ub); + else + ret = -ENOMEM; + } + mutex_unlock(&ublk_ctl_mutex); + + return ret; +} + +static inline bool ublk_idr_freed(int id) +{ + void *ptr; + + spin_lock(&ublk_idr_lock); + ptr = idr_find(&ublk_index_idr, id); + spin_unlock(&ublk_idr_lock); + + return ptr == NULL; +} + +static int ublk_ctrl_del_dev(int idx) +{ + struct ublk_device *ub; + int ret; + + ret = mutex_lock_killable(&ublk_ctl_mutex); + if (ret) + return ret; + + ub = ublk_get_device_from_id(idx); + if (ub) { + ublk_remove(ub); + ublk_put_device(ub); + ret = 0; + } else { + ret = -ENODEV; + } + + /* + * Wait until the idr is removed, then it can be reused after + * DEL_DEV command is returned. + */ + if (!ret) + wait_event(ublk_idr_wq, ublk_idr_freed(idx)); + mutex_unlock(&ublk_ctl_mutex); + + return ret; +} + + +static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) +{ + pr_devel("%s: dev id %d flags %llx\n", __func__, + info->dev_id, info->flags[0]); + pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", + info->nr_hw_queues, info->queue_depth, + info->block_size, info->dev_blocks); +} + +static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + + pr_devel("%s: cmd_op %x, dev id %d qid %d data %llx buf %llx len %u\n", + __func__, cmd->cmd_op, header->dev_id, header->queue_id, + header->data[0], header->addr, header->len); +} + +static int ublk_ctrl_cmd_validate(struct io_uring_cmd *cmd, + struct ublksrv_ctrl_dev_info *info) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + u32 cmd_op = cmd->cmd_op; + void __user *argp = (void __user *)(unsigned long)header->addr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd_op) { + case UBLK_CMD_GET_DEV_INFO: + if (header->len < sizeof(*info) || !header->addr) + return -EINVAL; + break; + case UBLK_CMD_ADD_DEV: + if (header->len < sizeof(*info) || !header->addr) + return -EINVAL; + if (copy_from_user(info, argp, sizeof(*info)) != 0) + return -EFAULT; + ublk_dump_dev_info(info); + if (header->dev_id != info->dev_id) { + printk(KERN_WARNING "%s: cmd %x, dev id not match %u %u\n", + __func__, cmd_op, header->dev_id, + info->dev_id); + return -EINVAL; + } + if (header->queue_id != (u16)-1) { + printk(KERN_WARNING "%s: cmd %x queue_id is wrong %x\n", + __func__, cmd_op, header->queue_id); + return -EINVAL; + } + break; + case UBLK_CMD_GET_QUEUE_AFFINITY: + if ((header->len * BITS_PER_BYTE) < nr_cpu_ids) + return -EINVAL; + if (header->len & (sizeof(unsigned long)-1)) + return -EINVAL; + if (!header->addr) + return -EINVAL; + }; + + return 0; +} + +static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, + unsigned int issue_flags) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + void __user *argp = (void __user *)(unsigned long)header->addr; + struct ublksrv_ctrl_dev_info info; + u32 cmd_op = cmd->cmd_op; + struct ublk_device *ub; + int ret = -EINVAL; + + ublk_ctrl_cmd_dump(cmd); + + if (!(issue_flags & IO_URING_F_SQE128)) + goto out; + + ret = ublk_ctrl_cmd_validate(cmd, &info); + if (ret) + goto out; + + ret = -ENODEV; + switch (cmd_op) { + case UBLK_CMD_START_DEV: + ub = ublk_get_device_from_id(header->dev_id); + if (ub) { + ret = ublk_ctrl_start_dev(ub, cmd); + ublk_put_device(ub); + } + break; + case UBLK_CMD_STOP_DEV: + ub = ublk_get_device_from_id(header->dev_id); + if (ub) { + ret = ublk_ctrl_stop_dev(ub); + ublk_put_device(ub); + } + break; + case UBLK_CMD_GET_DEV_INFO: + ub = ublk_get_device_from_id(header->dev_id); + if (ub) { + if (copy_to_user(argp, &ub->dev_info, sizeof(info))) + ret = -EFAULT; + else + ret = 0; + ublk_put_device(ub); + } + break; + case UBLK_CMD_ADD_DEV: + ret = ublk_ctrl_add_dev(&info, argp, header->dev_id); + break; + case UBLK_CMD_DEL_DEV: + ret = ublk_ctrl_del_dev(header->dev_id); + break; + case UBLK_CMD_GET_QUEUE_AFFINITY: + ret = ublk_ctrl_get_queue_affinity(cmd); + break; + default: + break; + }; + out: + io_uring_cmd_done(cmd, ret, 0); + pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", + __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); + return -EIOCBQUEUED; +} + +static const struct file_operations ublk_ctl_fops = { + .open = nonseekable_open, + .uring_cmd = ublk_ctrl_uring_cmd, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice ublk_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "ublk-control", + .fops = &ublk_ctl_fops, +}; + +static int __init ublk_init(void) +{ + int ret; + + init_waitqueue_head(&ublk_idr_wq); + + ret = misc_register(&ublk_misc); + if (ret) + return ret; + + ret = alloc_chrdev_region(&ublk_chr_devt, 0, UBLK_MINORS, "ublk-char"); + if (ret) + goto unregister_mis; + + ublk_chr_class = class_create(THIS_MODULE, "ublk-char"); + if (IS_ERR(ublk_chr_class)) { + ret = PTR_ERR(ublk_chr_class); + goto free_chrdev_region; + } + return 0; + +free_chrdev_region: + unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS); +unregister_mis: + misc_deregister(&ublk_misc); + return ret; +} + +static void __exit ublk_exit(void) +{ + struct ublk_device *ub; + int id; + + class_destroy(ublk_chr_class); + + misc_deregister(&ublk_misc); + + idr_for_each_entry(&ublk_index_idr, ub, id) + ublk_remove(ub); + + idr_destroy(&ublk_index_idr); + unregister_chrdev_region(ublk_chr_devt, UBLK_MINORS); +} + +module_init(ublk_init); +module_exit(ublk_exit); + +MODULE_AUTHOR("Ming Lei "); +MODULE_LICENSE("GPL"); diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h new file mode 100644 index 000000000000..4f0c16ec875e --- /dev/null +++ b/include/uapi/linux/ublk_cmd.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef USER_BLK_DRV_CMD_INC_H +#define USER_BLK_DRV_CMD_INC_H + +#include + +/* ublk server command definition */ + +/* + * Admin commands, issued by ublk server, and handled by ublk driver. + */ +#define UBLK_CMD_GET_QUEUE_AFFINITY 0x01 +#define UBLK_CMD_GET_DEV_INFO 0x02 +#define UBLK_CMD_ADD_DEV 0x04 +#define UBLK_CMD_DEL_DEV 0x05 +#define UBLK_CMD_START_DEV 0x06 +#define UBLK_CMD_STOP_DEV 0x07 + +/* + * IO commands, issued by ublk server, and handled by ublk driver. + * + * FETCH_REQ: issued via sqe(URING_CMD) beforehand for fetching IO request + * from ublk driver, should be issued only when starting device. After + * the associated cqe is returned, request's tag can be retrieved via + * cqe->userdata. + * + * COMMIT_AND_FETCH_REQ: issued via sqe(URING_CMD) after ublkserver handled + * this IO request, request's handling result is committed to ublk + * driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be + * handled before completing io request. + */ +#define UBLK_IO_FETCH_REQ 0x20 +#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21 + +/* only ABORT means that no re-fetch */ +#define UBLK_IO_RES_OK 0 +#define UBLK_IO_RES_ABORT (-ENODEV) + +#define UBLKSRV_CMD_BUF_OFFSET 0 +#define UBLKSRV_IO_BUF_OFFSET 0x80000000 + +/* tag bit is 12bit, so at most 4096 IOs for each queue */ +#define UBLK_MAX_QUEUE_DEPTH 4096 + +/* + * zero copy requires 4k block size, and can remap ublk driver's io + * request into ublksrv's vm space + */ +#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) + +/* device state */ +#define UBLK_S_DEV_DEAD 0 +#define UBLK_S_DEV_LIVE 1 + +/* shipped via sqe->cmd of io_uring command */ +struct ublksrv_ctrl_cmd { + /* sent to which device, must be valid */ + __u32 dev_id; + + /* sent to which queue, must be -1 if the cmd isn't for queue */ + __u16 queue_id; + /* + * cmd specific buffer, can be IN or OUT. + */ + __u16 len; + __u64 addr; + + /* inline data */ + __u64 data[2]; +}; + +struct ublksrv_ctrl_dev_info { + __u16 nr_hw_queues; + __u16 queue_depth; + __u16 block_size; + __u16 state; + + __u32 rq_max_blocks; + __u32 dev_id; + + __u64 dev_blocks; + + __s32 ublksrv_pid; + __s32 reserved0; + __u64 flags[2]; + + /* For ublksrv internal use, invisible to ublk driver */ + __u64 ublksrv_flags; + __u64 reserved1[9]; +}; + +#define UBLK_IO_OP_READ 0 +#define UBLK_IO_OP_WRITE 1 +#define UBLK_IO_OP_FLUSH 2 +#define UBLK_IO_OP_DISCARD 3 +#define UBLK_IO_OP_WRITE_SAME 4 +#define UBLK_IO_OP_WRITE_ZEROES 5 + +#define UBLK_IO_F_FAILFAST_DEV (1U << 8) +#define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) +#define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) +#define UBLK_IO_F_META (1U << 11) +#define UBLK_IO_F_INTEGRITY (1U << 12) +#define UBLK_IO_F_FUA (1U << 13) +#define UBLK_IO_F_PREFLUSH (1U << 14) +#define UBLK_IO_F_NOUNMAP (1U << 15) +#define UBLK_IO_F_SWAP (1U << 16) + +/* + * io cmd is described by this structure, and stored in share memory, indexed + * by request tag. + * + * The data is stored by ublk driver, and read by ublksrv after one fetch command + * returns. + */ +struct ublksrv_io_desc { + /* op: bit 0-7, flags: bit 8-31 */ + __u32 op_flags; + + __u32 nr_sectors; + + /* start sector for this io */ + __u64 start_sector; + + /* buffer address in ublksrv daemon vm space, from ublk driver */ + __u64 addr; +}; + +static inline __u8 ublksrv_get_op(const struct ublksrv_io_desc *iod) +{ + return iod->op_flags & 0xff; +} + +static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod) +{ + return iod->op_flags >> 8; +} + +/* issued to ublk driver via /dev/ublkcN */ +struct ublksrv_io_cmd { + __u16 q_id; + + /* for fetch/commit which result */ + __u16 tag; + + /* io result, it is valid for COMMIT* command only */ + __s32 result; + + /* + * userspace buffer address in ublksrv daemon process, valid for + * FETCH* command only + */ + __u64 addr; +}; + +#endif From 0edb3696c1713c42f52acbd8355b545e58f782b1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 13 Jul 2022 22:07:11 +0800 Subject: [PATCH 081/178] ublk_drv: support to complete io command via task_work_add Use task_work_add if it is available, since task_work_add can bring up better performance, especially batching signaling ->ubq_daemon can be done. It is observed that task_work_add() can boost iops by +4% on random 4k io test. Also except for completing io command, all other code paths are same with completing io command via io_uring_cmd_complete_in_task. Meantime add one flag of UBLK_F_URING_CMD_COMP_IN_TASK for comparing the mode easily. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220713140711.97356-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 75 +++++++++++++++++++++++++++++++---- include/uapi/linux/ublk_cmd.h | 6 +++ 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 922a84c86fc6..35fa06ee70ff 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -41,10 +41,15 @@ #include #include #include +#include #include #define UBLK_MINORS (1U << MINORBITS) +struct ublk_rq_data { + struct callback_head work; +}; + struct ublk_uring_cmd_pdu { struct request *req; }; @@ -91,6 +96,7 @@ struct ublk_queue { int q_id; int q_depth; + unsigned long flags; struct task_struct *ubq_daemon; char *io_cmd_buf; @@ -149,6 +155,14 @@ static DEFINE_MUTEX(ublk_ctl_mutex); static struct miscdevice ublk_misc; +static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) +{ + if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && + !(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK)) + return true; + return false; +} + static struct ublk_device *ublk_get_device(struct ublk_device *ub) { if (kobject_get_unless_zero(&ub->cdev_dev.kobj)) @@ -500,12 +514,10 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req) #define UBLK_REQUEUE_DELAY_MS 3 -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +static inline void __ublk_rq_task_work(struct request *req) { - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - struct ublk_device *ub = cmd->file->private_data; - struct request *req = pdu->req; struct ublk_queue *ubq = req->mq_hctx->driver_data; + struct ublk_device *ub = ubq->dev; int tag = req->tag; struct ublk_io *io = &ubq->ios[tag]; bool task_exiting = current != ubq->ubq_daemon || @@ -557,13 +569,27 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0); } +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +{ + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + + __ublk_rq_task_work(pdu->req); +} + +static void ublk_rq_task_work_fn(struct callback_head *work) +{ + struct ublk_rq_data *data = container_of(work, + struct ublk_rq_data, work); + struct request *req = blk_mq_rq_from_pdu(data); + + __ublk_rq_task_work(req); +} + static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct ublk_queue *ubq = hctx->driver_data; struct request *rq = bd->rq; - struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); blk_status_t res; /* fill iod to slot in io cmd buffer */ @@ -574,16 +600,36 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (unlikely(ubq_daemon_is_dying(ubq))) { + fail: mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); return BLK_STS_IOERR; } - pdu->req = rq; - io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + if (ublk_can_use_task_work(ubq)) { + struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); + enum task_work_notify_mode notify_mode = bd->last ? + TWA_SIGNAL_NO_IPI : TWA_NONE; + + if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) + goto fail; + } else { + struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; + struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + + pdu->req = rq; + io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb); + } return BLK_STS_OK; } +static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) +{ + struct ublk_queue *ubq = hctx->driver_data; + + if (ublk_can_use_task_work(ubq)) + __set_notify_signal(ubq->ubq_daemon); +} static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) @@ -595,9 +641,20 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, return 0; } +static int ublk_init_rq(struct blk_mq_tag_set *set, struct request *req, + unsigned int hctx_idx, unsigned int numa_node) +{ + struct ublk_rq_data *data = blk_mq_rq_to_pdu(req); + + init_task_work(&data->work, ublk_rq_task_work_fn); + return 0; +} + static const struct blk_mq_ops ublk_mq_ops = { .queue_rq = ublk_queue_rq, + .commit_rqs = ublk_commit_rqs, .init_hctx = ublk_init_hctx, + .init_request = ublk_init_rq, }; static int ublk_ch_open(struct inode *inode, struct file *filp) @@ -912,6 +969,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id) void *ptr; int size; + ubq->flags = ub->dev_info.flags[0]; ubq->q_id = q_id; ubq->q_depth = ub->dev_info.queue_depth; size = ublk_queue_cmd_buf_size(ub, q_id); @@ -1099,6 +1157,7 @@ static int ublk_add_dev(struct ublk_device *ub) ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues; ub->tag_set.queue_depth = ub->dev_info.queue_depth; ub->tag_set.numa_node = NUMA_NO_NODE; + ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ub->tag_set.driver_data = ub; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 4f0c16ec875e..a3f5e7c21807 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -48,6 +48,12 @@ */ #define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) +/* + * Force to complete io cmd via io_uring_cmd_complete_in_task so that + * performance comparison is done easily with using task_work_add + */ +#define UBLK_F_URING_CMD_COMP_IN_TASK (1UL << 1) + /* device state */ #define UBLK_S_DEV_DEAD 0 #define UBLK_S_DEV_LIVE 1 From cebbe577cb17ed9b04b50d9e6802a8bacffbadca Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 14 Jul 2022 18:32:01 +0800 Subject: [PATCH 082/178] ublk_drv: fix request queue leak Call blk_cleanup_queue() in release code path for fixing request queue leak. Also for-5.20/block has cleaned up blk_cleanup_queue(), which is basically merged to del_gendisk() if blk_mq_alloc_disk() is used for allocating disk and queue. However, ublk may not add disk in case of starting device failure, then del_gendisk() won't be called when removing ublk device, so blk_mq_exit_queue will not be callsed, and it can be bit hard to deal with this kind of merge conflict. Turns out ublk's queue/disk use model is very similar with scsi, so switch to scsi's model by allocating disk and queue independently, then it can be quite easy to handle v5.20 merge conflict by replacing blk_cleanup_queue with blk_mq_destroy_queue. Reported-by: Jens Axboe Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220714103201.131648-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 35fa06ee70ff..f10c4319dc1f 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -155,6 +155,8 @@ static DEFINE_MUTEX(ublk_ctl_mutex); static struct miscdevice ublk_misc; +static struct lock_class_key ublk_bio_compl_lkclass; + static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) { if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && @@ -634,7 +636,7 @@ static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx) static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data, unsigned int hctx_idx) { - struct ublk_device *ub = hctx->queue->queuedata; + struct ublk_device *ub = driver_data; struct ublk_queue *ubq = ublk_get_queue(ub, hctx->queue_num); hctx->driver_data = ubq; @@ -1076,6 +1078,8 @@ static void ublk_cdev_rel(struct device *dev) { struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev); + blk_mq_destroy_queue(ub->ub_queue); + put_disk(ub->ub_disk); blk_mq_free_tag_set(&ub->tag_set); @@ -1165,14 +1169,17 @@ static int ublk_add_dev(struct ublk_device *ub) if (err) goto out_deinit_queues; - disk = ub->ub_disk = blk_mq_alloc_disk(&ub->tag_set, ub); + ub->ub_queue = blk_mq_init_queue(&ub->tag_set); + if (IS_ERR(ub->ub_queue)) + goto out_cleanup_tags; + ub->ub_queue->queuedata = ub; + + disk = ub->ub_disk = blk_mq_alloc_disk_for_queue(ub->ub_queue, + &ublk_bio_compl_lkclass); if (IS_ERR(disk)) { err = PTR_ERR(disk); - goto out_cleanup_tags; + goto out_free_request_queue; } - ub->ub_queue = ub->ub_disk->queue; - - ub->ub_queue->queuedata = ub; blk_queue_logical_block_size(ub->ub_queue, bsize); blk_queue_physical_block_size(ub->ub_queue, bsize); @@ -1204,6 +1211,8 @@ static int ublk_add_dev(struct ublk_device *ub) return 0; +out_free_request_queue: + blk_mq_destroy_queue(ub->ub_queue); out_cleanup_tags: blk_mq_free_tag_set(&ub->tag_set); out_deinit_queues: From 5bf83e9a14ddae994d783dee96b91bf46f04839c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:09 +0200 Subject: [PATCH 083/178] block: stop using bdevname in bdev_write_inode Just use the %pg format specifier instead. Also reformat the printk statement to be more readable. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-2-hch@lst.de Signed-off-by: Jens Axboe --- block/bdev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index 5fe06c1f2def..ce05175e71ce 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -54,12 +54,10 @@ static void bdev_write_inode(struct block_device *bdev) while (inode->i_state & I_DIRTY) { spin_unlock(&inode->i_lock); ret = write_inode_now(inode, true); - if (ret) { - char name[BDEVNAME_SIZE]; - pr_warn_ratelimited("VFS: Dirty inode writeback failed " - "for block device %s (err=%d).\n", - bdevname(bdev, name), ret); - } + if (ret) + pr_warn_ratelimited( + "VFS: Dirty inode writeback failed for block device %pg (err=%d).\n", + bdev, ret); spin_lock(&inode->i_lock); } spin_unlock(&inode->i_lock); From 02ff3dd20f512cf811ae8028c44fdb212b5f2bf7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:10 +0200 Subject: [PATCH 084/178] block: stop using bdevname in __blkdev_issue_discard Just use the %pg format specifier instead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-3-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-lib.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/block/blk-lib.c b/block/blk-lib.c index 09b7e1200c0f..67e6dbc1ae81 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -48,10 +48,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* In case the discard granularity isn't set by buggy device driver */ if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) { - char dev_name[BDEVNAME_SIZE]; - - bdevname(bdev, dev_name); - pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name); + pr_err_ratelimited("%pg: Error: discard_granularity is 0.\n", + bdev); return -EOPNOTSUPP; } From 1b70ccecaed4c3c50239e8409156fb447f965554 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:11 +0200 Subject: [PATCH 085/178] drbd: stop using bdevname in drbd_report_io_error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just use the %pg format specifier instead. Signed-off-by: Christoph Hellwig Reviewed-by: Christoph Böhmwalder Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index e64bcfba30ef..6d8dd14458c6 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -523,16 +523,14 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) { - char b[BDEVNAME_SIZE]; - if (!__ratelimit(&drbd_ratelimit_state)) return; - drbd_warn(device, "local %s IO error sector %llu+%u on %s\n", + drbd_warn(device, "local %s IO error sector %llu+%u on %pg\n", (req->rq_state & RQ_WRITE) ? "WRITE" : "READ", (unsigned long long)req->i.sector, req->i.size >> 9, - bdevname(device->ldev->backing_bdev, b)); + device->ldev->backing_bdev); } /* Helper for HANDED_OVER_TO_NETWORK. From fa070a3b50a17506a230e72bd48dba89e7bb5fea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:12 +0200 Subject: [PATCH 086/178] pktcdvd: stop using bdevname in pkt_seq_show Just use the %pg format specifier instead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 653d24231483..a7016ffce9a4 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2460,11 +2460,9 @@ static int pkt_seq_show(struct seq_file *m, void *p) { struct pktcdvd_device *pd = m->private; char *msg; - char bdev_buf[BDEVNAME_SIZE]; int states[PACKET_NUM_STATES]; - seq_printf(m, "Writer %s mapped to %s:\n", pd->name, - bdevname(pd->bdev, bdev_buf)); + seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev); seq_printf(m, "\nSettings:\n"); seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); From beecf70ee84363e92f3bf783b74da5f26e765d8d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:13 +0200 Subject: [PATCH 087/178] pktcdvd: stop using bdevname in pkt_new_dev Just use the %pg format specifier instead. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index a7016ffce9a4..01a15dbd9cde 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2519,7 +2519,6 @@ static int pkt_seq_show(struct seq_file *m, void *p) static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) { int i; - char b[BDEVNAME_SIZE]; struct block_device *bdev; struct scsi_device *sdev; @@ -2532,8 +2531,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) if (!pd2) continue; if (pd2->bdev->bd_dev == dev) { - pkt_err(pd, "%s already setup\n", - bdevname(pd2->bdev, b)); + pkt_err(pd, "%pg already setup\n", pd2->bdev); return -EBUSY; } if (pd2->pkt_dev == dev) { @@ -2568,7 +2566,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) } proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd); - pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b)); + pkt_dbg(1, pd, "writer mapped to %pg\n", bdev); return 0; out_mem: From 6e880cf59932a14bca128fc8e8faae0554932942 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:14 +0200 Subject: [PATCH 088/178] rnbd-srv: remove the name field from struct rnbd_dev Just print the block device name directly using the %pg format specifier. Signed-off-by: Christoph Hellwig Acked-by: Jack Wang Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-srv-dev.c | 1 - drivers/block/rnbd/rnbd-srv-dev.h | 1 - drivers/block/rnbd/rnbd-srv-sysfs.c | 5 ++--- drivers/block/rnbd/rnbd-srv.c | 9 ++++----- drivers/block/rnbd/rnbd-srv.h | 3 +-- 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv-dev.c b/drivers/block/rnbd/rnbd-srv-dev.c index c5d0a0391165..c63017f6e421 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.c +++ b/drivers/block/rnbd/rnbd-srv-dev.c @@ -28,7 +28,6 @@ struct rnbd_dev *rnbd_dev_open(const char *path, fmode_t flags) goto err; dev->blk_open_flags = flags; - bdevname(dev->bdev, dev->name); return dev; diff --git a/drivers/block/rnbd/rnbd-srv-dev.h b/drivers/block/rnbd/rnbd-srv-dev.h index 4309e5252469..8407d12f70af 100644 --- a/drivers/block/rnbd/rnbd-srv-dev.h +++ b/drivers/block/rnbd/rnbd-srv-dev.h @@ -15,7 +15,6 @@ struct rnbd_dev { struct block_device *bdev; fmode_t blk_open_flags; - char name[BDEVNAME_SIZE]; }; /** diff --git a/drivers/block/rnbd/rnbd-srv-sysfs.c b/drivers/block/rnbd/rnbd-srv-sysfs.c index feaa76c5a342..297a6924ff4e 100644 --- a/drivers/block/rnbd/rnbd-srv-sysfs.c +++ b/drivers/block/rnbd/rnbd-srv-sysfs.c @@ -38,14 +38,13 @@ static struct kobj_type dev_ktype = { }; int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, - struct block_device *bdev, - const char *dev_name) + struct block_device *bdev) { struct kobject *bdev_kobj; int ret; ret = kobject_init_and_add(&dev->dev_kobj, &dev_ktype, - rnbd_devs_kobj, dev_name); + rnbd_devs_kobj, "%pg", bdev); if (ret) { kobject_put(&dev->dev_kobj); return ret; diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index beaef43a67b9..0713014bf423 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -419,7 +419,7 @@ static struct rnbd_srv_sess_dev return sess_dev; } -static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(const char *id) +static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(struct block_device *bdev) { struct rnbd_srv_dev *dev; @@ -427,7 +427,7 @@ static struct rnbd_srv_dev *rnbd_srv_init_srv_dev(const char *id) if (!dev) return ERR_PTR(-ENOMEM); - strscpy(dev->id, id, sizeof(dev->id)); + snprintf(dev->id, sizeof(dev->id), "%pg", bdev); kref_init(&dev->kref); INIT_LIST_HEAD(&dev->sess_dev_list); mutex_init(&dev->lock); @@ -512,7 +512,7 @@ rnbd_srv_get_or_create_srv_dev(struct rnbd_dev *rnbd_dev, int ret; struct rnbd_srv_dev *new_dev, *dev; - new_dev = rnbd_srv_init_srv_dev(rnbd_dev->name); + new_dev = rnbd_srv_init_srv_dev(rnbd_dev->bdev); if (IS_ERR(new_dev)) return new_dev; @@ -758,8 +758,7 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess, */ mutex_lock(&srv_dev->lock); if (!srv_dev->dev_kobj.state_in_sysfs) { - ret = rnbd_srv_create_dev_sysfs(srv_dev, rnbd_dev->bdev, - rnbd_dev->name); + ret = rnbd_srv_create_dev_sysfs(srv_dev, rnbd_dev->bdev); if (ret) { mutex_unlock(&srv_dev->lock); rnbd_srv_err(srv_sess_dev, diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h index be2ae486d407..6926f9069dc4 100644 --- a/drivers/block/rnbd/rnbd-srv.h +++ b/drivers/block/rnbd/rnbd-srv.h @@ -68,8 +68,7 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev, /* rnbd-srv-sysfs.c */ int rnbd_srv_create_dev_sysfs(struct rnbd_srv_dev *dev, - struct block_device *bdev, - const char *dir_name); + struct block_device *bdev); void rnbd_srv_destroy_dev_sysfs(struct rnbd_srv_dev *dev); int rnbd_srv_create_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); void rnbd_srv_destroy_dev_session_sysfs(struct rnbd_srv_sess_dev *sess_dev); From 4664954c9421ce326bb5c84f175902b03f17237e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:15 +0200 Subject: [PATCH 089/178] ocfs2/cluster: remove the hr_dev_name field from struct o2hb_region Just print the block device name directly using the %pg format specifier. Signed-off-by: Christoph Hellwig Reviewed-by: Joel Becker Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-8-hch@lst.de Signed-off-by: Jens Axboe --- fs/ocfs2/cluster/heartbeat.c | 64 +++++++++++++++++------------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index ea0e70c0fce0..5f83c0c0918c 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -235,8 +235,6 @@ struct o2hb_region { * (hr_steady_iterations == 0) within hr_unsteady_iterations */ atomic_t hr_unsteady_iterations; - char hr_dev_name[BDEVNAME_SIZE]; - unsigned int hr_timeout_ms; /* randomized as the region goes up and down so that a node @@ -287,8 +285,8 @@ static void o2hb_write_timeout(struct work_struct *work) container_of(work, struct o2hb_region, hr_write_timeout_work.work); - mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " - "milliseconds\n", reg->hr_dev_name, + mlog(ML_ERROR, "Heartbeat write timeout to device %pg after %u " + "milliseconds\n", reg->hr_bdev, jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); if (o2hb_global_heartbeat_active()) { @@ -383,9 +381,9 @@ static void o2hb_nego_timeout(struct work_struct *work) if (master_node == o2nm_this_node()) { if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { - printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s).\n", + printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%pg).\n", o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, - config_item_name(®->hr_item), reg->hr_dev_name); + config_item_name(®->hr_item), reg->hr_bdev); set_bit(master_node, reg->hr_nego_node_bitmap); } if (memcmp(reg->hr_nego_node_bitmap, live_node_bitmap, @@ -399,8 +397,8 @@ static void o2hb_nego_timeout(struct work_struct *work) return; } - printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%s) is down.\n", - config_item_name(®->hr_item), reg->hr_dev_name); + printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%pg) is down.\n", + config_item_name(®->hr_item), reg->hr_bdev); /* approve negotiate timeout request. */ o2hb_arm_timeout(reg); @@ -419,9 +417,9 @@ static void o2hb_nego_timeout(struct work_struct *work) } } else { /* negotiate timeout with master node. */ - printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%s), negotiate timeout with node %d.\n", + printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%pg), negotiate timeout with node %d.\n", o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(®->hr_item), - reg->hr_dev_name, master_node); + reg->hr_bdev, master_node); ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, master_node); if (ret) @@ -437,8 +435,8 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data, struct o2hb_nego_msg *nego_msg; nego_msg = (struct o2hb_nego_msg *)msg->buf; - printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%s).\n", - nego_msg->node_num, config_item_name(®->hr_item), reg->hr_dev_name); + printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%pg).\n", + nego_msg->node_num, config_item_name(®->hr_item), reg->hr_bdev); if (nego_msg->node_num < O2NM_MAX_NODES) set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); else @@ -452,8 +450,8 @@ static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data, { struct o2hb_region *reg = data; - printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%s).\n", - config_item_name(®->hr_item), reg->hr_dev_name); + printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%pg).\n", + config_item_name(®->hr_item), reg->hr_bdev); o2hb_arm_timeout(reg); return 0; } @@ -689,8 +687,8 @@ static int o2hb_check_own_slot(struct o2hb_region *reg) else errstr = ERRSTR3; - mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), " - "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name, + mlog(ML_ERROR, "%s (%pg): expected(%u:0x%llx, 0x%llx), " + "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_bdev, slot->ds_node_num, (unsigned long long)slot->ds_last_generation, (unsigned long long)slot->ds_last_time, hb_block->hb_node, (unsigned long long)le64_to_cpu(hb_block->hb_generation), @@ -863,8 +861,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg) sizeof(o2hb_live_node_bitmap))) goto unlock; - printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n", - config_item_name(®->hr_item), reg->hr_dev_name); + printk(KERN_NOTICE "o2hb: Region %s (%pg) is now a quorum device\n", + config_item_name(®->hr_item), reg->hr_bdev); set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); @@ -922,8 +920,8 @@ static int o2hb_check_slot(struct o2hb_region *reg, /* The node is live but pushed out a bad crc. We * consider it a transient miss but don't populate any * other values as they may be junk. */ - mlog(ML_ERROR, "Node %d has written a bad crc to %s\n", - slot->ds_node_num, reg->hr_dev_name); + mlog(ML_ERROR, "Node %d has written a bad crc to %pg\n", + slot->ds_node_num, reg->hr_bdev); o2hb_dump_slot(hb_block); slot->ds_equal_samples++; @@ -1002,11 +1000,11 @@ fire_callbacks: slot_dead_ms = le32_to_cpu(hb_block->hb_dead_ms); if (slot_dead_ms && slot_dead_ms != dead_ms) { /* TODO: Perhaps we can fail the region here. */ - mlog(ML_ERROR, "Node %d on device %s has a dead count " + mlog(ML_ERROR, "Node %d on device %pg has a dead count " "of %u ms, but our count is %u ms.\n" "Please double check your configuration values " "for 'O2CB_HEARTBEAT_THRESHOLD'\n", - slot->ds_node_num, reg->hr_dev_name, slot_dead_ms, + slot->ds_node_num, reg->hr_bdev, slot_dead_ms, dead_ms); } goto out; @@ -1145,8 +1143,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) /* Do not re-arm the write timeout on I/O error - we * can't be sure that the new block ever made it to * disk */ - mlog(ML_ERROR, "Write error %d on device \"%s\"\n", - write_wc.wc_error, reg->hr_dev_name); + mlog(ML_ERROR, "Write error %d on device \"%pg\"\n", + write_wc.wc_error, reg->hr_bdev); ret = write_wc.wc_error; goto bail; } @@ -1170,9 +1168,9 @@ bail: if (atomic_read(®->hr_steady_iterations) != 0) { if (atomic_dec_and_test(®->hr_unsteady_iterations)) { printk(KERN_NOTICE "o2hb: Unable to stabilize " - "heartbeat on region %s (%s)\n", + "heartbeat on region %s (%pg)\n", config_item_name(®->hr_item), - reg->hr_dev_name); + reg->hr_bdev); atomic_set(®->hr_steady_iterations, 0); reg->hr_aborted_start = 1; wake_up(&o2hb_steady_queue); @@ -1494,7 +1492,7 @@ static void o2hb_region_release(struct config_item *item) struct page *page; struct o2hb_region *reg = to_o2hb_region(item); - mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name); + mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg->hr_bdev); kfree(reg->hr_tmp_block); @@ -1641,7 +1639,7 @@ static ssize_t o2hb_region_dev_show(struct config_item *item, char *page) unsigned int ret = 0; if (to_o2hb_region(item)->hr_bdev) - ret = sprintf(page, "%s\n", to_o2hb_region(item)->hr_dev_name); + ret = sprintf(page, "%pg\n", to_o2hb_region(item)->hr_bdev); return ret; } @@ -1798,8 +1796,6 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, goto out2; } - bdevname(reg->hr_bdev, reg->hr_dev_name); - sectsize = bdev_logical_block_size(reg->hr_bdev); if (sectsize != reg->hr_block_bytes) { mlog(ML_ERROR, @@ -1895,8 +1891,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, ret = -EIO; if (hb_task && o2hb_global_heartbeat_active()) - printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n", - config_item_name(®->hr_item), reg->hr_dev_name); + printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n", + config_item_name(®->hr_item), reg->hr_bdev); out3: if (ret < 0) { @@ -2088,10 +2084,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, quorum_region = 1; clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); spin_unlock(&o2hb_live_lock); - printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n", + printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%pg)\n", ((atomic_read(®->hr_steady_iterations) == 0) ? "stopped" : "start aborted"), config_item_name(item), - reg->hr_dev_name); + reg->hr_bdev); } /* From c5b045b9838972cc4c4985a32fa5d35ecf2ab15a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:16 +0200 Subject: [PATCH 090/178] ext4: only initialize mmp_bdevname once mmp_bdevname is currently both initialized nested inside the kthread_run call in ext4_multi_mount_protect and in the kmmpd thread started by it. Lift the initiaization out of the kthread_run call in ext4_multi_mount_protect, move the BUILD_BUG_ON next to it and remove the duplicate assignment inside of kmmpd. Signed-off-by: Christoph Hellwig Acked-by: Theodore Ts'o Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-9-hch@lst.de Signed-off-by: Jens Axboe --- fs/ext4/mmp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 79d05e464c43..b7a850b0070b 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -150,8 +150,6 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); - BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); - bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, sizeof(mmp->mmp_nodename)); @@ -372,13 +370,15 @@ skip: EXT4_SB(sb)->s_mmp_bh = bh; + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); + bdevname(bh->b_bdev, mmp->mmp_bdevname); + /* * Start a kernel thread to update the MMP block periodically. */ EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s", (int)sizeof(mmp->mmp_bdevname), - bdevname(bh->b_bdev, - mmp->mmp_bdevname)); + mmp->mmp_bdevname); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { EXT4_SB(sb)->s_mmp_tsk = NULL; ext4_warning(sb, "Unable to create kmmpd thread for %s.", From 900d156bac2bc474cf7c7bee4efbc6c83ec5ae58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 13 Jul 2022 07:53:17 +0200 Subject: [PATCH 091/178] block: remove bdevname Replace the remaining calls of bdevname with snprintf using the %pg format specifier. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20220713055317.1888500-10-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 23 ----------------------- drivers/md/md.c | 2 +- drivers/md/raid1.c | 2 +- drivers/md/raid10.c | 2 +- fs/ext4/mmp.c | 3 ++- fs/jbd2/journal.c | 6 ++++-- include/linux/blkdev.h | 1 - kernel/trace/blktrace.c | 4 ++-- 8 files changed, 11 insertions(+), 32 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 9d30f159c59a..44dfcf67ed96 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -101,29 +101,6 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size) } EXPORT_SYMBOL_GPL(set_capacity_and_notify); -/* - * Format the device name of the indicated block device into the supplied buffer - * and return a pointer to that same buffer for convenience. - * - * Note: do not use this in new code, use the %pg specifier to sprintf and - * printk insted. - */ -const char *bdevname(struct block_device *bdev, char *buf) -{ - struct gendisk *hd = bdev->bd_disk; - int partno = bdev->bd_partno; - - if (!partno) - snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); - else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) - snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); - else - snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); - - return buf; -} -EXPORT_SYMBOL(bdevname); - static void part_stat_read_all(struct block_device *part, struct disk_stats *stat) { diff --git a/drivers/md/md.c b/drivers/md/md.c index 076255ec9ba1..4be9d8173071 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2438,7 +2438,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) mdname(mddev), mddev->max_disks); return -EBUSY; } - bdevname(rdev->bdev,b); + snprintf(b, sizeof(b), "%pg", rdev->bdev); strreplace(b, '/', '!'); rdev->mddev = mddev; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 258d4eb2d63c..65cd90f0b2a8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1240,7 +1240,7 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, rcu_read_lock(); rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev); if (rdev) - bdevname(rdev->bdev, b); + snprintf(b, sizeof(b), "%pg", rdev->bdev); else strcpy(b, "???"); rcu_read_unlock(); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d589f823feb1..a7dcb1bf6b0a 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1164,7 +1164,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, disk = r10_bio->devs[slot].devnum; err_rdev = rcu_dereference(conf->mirrors[disk].rdev); if (err_rdev) - bdevname(err_rdev->bdev, b); + snprintf(b, sizeof(b), "%pg", err_rdev->bdev); else { strcpy(b, "???"); /* This never gets dereferenced */ diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index b7a850b0070b..b221f313ded6 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -371,7 +371,8 @@ skip: EXT4_SB(sb)->s_mmp_bh = bh; BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); - bdevname(bh->b_bdev, mmp->mmp_bdevname); + snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname), + "%pg", bh->b_bdev); /* * Start a kernel thread to update the MMP block periodically. diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index c0cbeeaec2d1..9015f5fa2862 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1465,7 +1465,8 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev, if (!journal) return NULL; - bdevname(journal->j_dev, journal->j_devname); + snprintf(journal->j_devname, sizeof(journal->j_devname), + "%pg", journal->j_dev); strreplace(journal->j_devname, '/', '!'); jbd2_stats_proc_init(journal); @@ -1507,7 +1508,8 @@ journal_t *jbd2_journal_init_inode(struct inode *inode) return NULL; journal->j_inode = inode; - bdevname(journal->j_dev, journal->j_devname); + snprintf(journal->j_devname, sizeof(journal->j_devname), + "%pg", journal->j_dev); p = strreplace(journal->j_devname, '/', '!'); sprintf(p, "-%lu", journal->j_inode->i_ino); jbd2_stats_proc_init(journal); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22c477fadc0f..2775763c51b9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1457,7 +1457,6 @@ static inline void bio_end_io_acct(struct bio *bio, unsigned long start_time) int bdev_read_only(struct block_device *bdev); int set_blocksize(struct block_device *bdev, int size); -const char *bdevname(struct block_device *bdev, char *buffer); int lookup_bdev(const char *pathname, dev_t *dev); void blkdev_show(struct seq_file *seqf, off_t offset); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c584effe5fe9..4752bda1b1a0 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -736,12 +736,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: - bdevname(bdev, b); + snprintf(b, sizeof(b), "%pg", bdev); ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: - bdevname(bdev, b); + snprintf(b, sizeof(b), "%pg", bdev); ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #endif From ff07a02e9e8e6489db841e0c48a5c78e7e78d572 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:27 -0700 Subject: [PATCH 092/178] treewide: Rename enum req_opf into enum req_op The type name enum req_opf is misleading since it suggests that values of this type include both an operation type and flags. Since values of this type represent an operation only, change the type name into enum req_op. Convert the enum req_op documentation into kernel-doc format. Move a few definitions such that the enum req_op documentation occurs just above the enum req_op definition. The name "req_opf" was introduced by commit ef295ecf090d ("block: better op and flags encoding"). Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-zoned.c | 7 +++---- drivers/block/null_blk/main.c | 9 ++++----- drivers/block/null_blk/null_blk.h | 12 +++++------- drivers/block/null_blk/trace.h | 2 +- drivers/block/null_blk/zoned.c | 4 ++-- drivers/md/dm-integrity.c | 2 +- drivers/nvme/target/zns.c | 4 ++-- drivers/scsi/sd_zbc.c | 2 +- drivers/ufs/core/ufshpb.c | 5 ++--- fs/zonefs/super.c | 5 ++--- fs/zonefs/trace.h | 2 +- include/linux/blk_types.h | 16 ++++++++-------- include/linux/blkdev.h | 2 +- 13 files changed, 33 insertions(+), 39 deletions(-) diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 7c017458d5ce..a264621d4905 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -256,9 +256,8 @@ static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask) * The operation to execute on each zone can be a zone reset, open, close * or finish request. */ -int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, - sector_t sector, sector_t nr_sectors, - gfp_t gfp_mask) +int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, + sector_t sector, sector_t nr_sectors, gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); sector_t zone_sectors = bdev_zone_sectors(bdev); @@ -397,7 +396,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, void __user *argp = (void __user *)arg; struct request_queue *q; struct blk_zone_range zrange; - enum req_opf op; + enum req_op op; int ret; if (!argp) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 4e03a020ee3c..8b224ede2e33 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1310,7 +1310,7 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, } static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, - enum req_opf op, + enum req_op op, sector_t sector, sector_t nr_sectors) { @@ -1381,9 +1381,8 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) } } -blk_status_t null_process_cmd(struct nullb_cmd *cmd, - enum req_opf op, sector_t sector, - unsigned int nr_sectors) +blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, + sector_t sector, unsigned int nr_sectors) { struct nullb_device *dev = cmd->nq->dev; blk_status_t ret; @@ -1401,7 +1400,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, } static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, - sector_t nr_sectors, enum req_opf op) + sector_t nr_sectors, enum req_op op) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index 8359b43842f2..6fbf0a1b2622 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -136,9 +136,8 @@ struct nullb { blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector, sector_t nr_sectors); -blk_status_t null_process_cmd(struct nullb_cmd *cmd, - enum req_opf op, sector_t sector, - unsigned int nr_sectors); +blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_op op, + sector_t sector, unsigned int nr_sectors); #ifdef CONFIG_BLK_DEV_ZONED int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q); @@ -146,9 +145,8 @@ int null_register_zoned_dev(struct nullb *nullb); void null_free_zoned_dev(struct nullb_device *dev); int null_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); -blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, - enum req_opf op, sector_t sector, - sector_t nr_sectors); +blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op, + sector_t sector, sector_t nr_sectors); size_t null_zone_valid_read_len(struct nullb *nullb, sector_t sector, unsigned int len); #else @@ -164,7 +162,7 @@ static inline int null_register_zoned_dev(struct nullb *nullb) } static inline void null_free_zoned_dev(struct nullb_device *dev) {} static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, - enum req_opf op, sector_t sector, sector_t nr_sectors) + enum req_op op, sector_t sector, sector_t nr_sectors) { return BLK_STS_NOTSUPP; } diff --git a/drivers/block/null_blk/trace.h b/drivers/block/null_blk/trace.h index 86d6c12c603c..6b2b370e786f 100644 --- a/drivers/block/null_blk/trace.h +++ b/drivers/block/null_blk/trace.h @@ -36,7 +36,7 @@ TRACE_EVENT(nullb_zone_op, TP_ARGS(cmd, zone_no, zone_cond), TP_STRUCT__entry( __array(char, disk, DISK_NAME_LEN) - __field(enum req_opf, op) + __field(enum req_op, op) __field(unsigned int, zone_no) __field(unsigned int, zone_cond) ), diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 64b06caab984..55a69e48ef8b 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -600,7 +600,7 @@ static blk_status_t null_reset_zone(struct nullb_device *dev, return BLK_STS_OK; } -static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, +static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_op op, sector_t sector) { struct nullb_device *dev = cmd->nq->dev; @@ -653,7 +653,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, return ret; } -blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op, +blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_op op, sector_t sector, sector_t nr_sectors) { struct nullb_device *dev; diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3d5a0ce123c9..148978ad03a8 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -298,7 +298,7 @@ struct dm_integrity_io { struct work_struct work; struct dm_integrity_c *ic; - enum req_opf op; + enum req_op op; bool fua; struct dm_integrity_range range; diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index c0ee21fcab81..b233c0943fec 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -308,7 +308,7 @@ void nvmet_bdev_execute_zone_mgmt_recv(struct nvmet_req *req) queue_work(zbd_wq, &req->z.zmgmt_work); } -static inline enum req_opf zsa_req_op(u8 zsa) +static inline enum req_op zsa_req_op(u8 zsa) { switch (zsa) { case NVME_ZONE_OPEN: @@ -465,7 +465,7 @@ static void nvmet_bdev_zmgmt_send_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, z.zmgmt_work); sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->zms.slba); - enum req_opf op = zsa_req_op(req->cmd->zms.zsa); + enum req_op op = zsa_req_op(req->cmd->zms.zsa); struct block_device *bdev = req->ns->bdev; sector_t zone_sectors = bdev_zone_sectors(bdev); u16 status = NVME_SC_SUCCESS; diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index b8c97456506a..bd15624c6322 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -529,7 +529,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd, struct request *rq = scsi_cmd_to_rq(cmd); struct scsi_disk *sdkp = scsi_disk(rq->q->disk); unsigned int zno = blk_rq_zone_no(rq); - enum req_opf op = req_op(rq); + enum req_op op = req_op(rq); unsigned long flags; /* diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c index de2bb8401bc4..24f1ee82c215 100644 --- a/drivers/ufs/core/ufshpb.c +++ b/drivers/ufs/core/ufshpb.c @@ -433,9 +433,8 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return 0; } -static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb, - int rgn_idx, enum req_opf dir, - bool atomic) +static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb, int rgn_idx, + enum req_op dir, bool atomic) { struct ufshpb_req *rq; struct request *req; diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 9c0eef1ff32a..a221ddb12da6 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -60,8 +60,7 @@ static void zonefs_account_active(struct inode *inode) } } -static inline int zonefs_zone_mgmt(struct inode *inode, - enum req_opf op) +static inline int zonefs_zone_mgmt(struct inode *inode, enum req_op op) { struct zonefs_inode_info *zi = ZONEFS_I(inode); int ret; @@ -525,7 +524,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) { struct zonefs_inode_info *zi = ZONEFS_I(inode); loff_t old_isize; - enum req_opf op; + enum req_op op; int ret = 0; /* diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h index f369d7d50303..21501da764bd 100644 --- a/fs/zonefs/trace.h +++ b/fs/zonefs/trace.h @@ -20,7 +20,7 @@ #define show_dev(dev) MAJOR(dev), MINOR(dev) TRACE_EVENT(zonefs_zone_mgmt, - TP_PROTO(struct inode *inode, enum req_opf op), + TP_PROTO(struct inode *inode, enum req_op op), TP_ARGS(inode, op), TP_STRUCT__entry( __field(dev_t, dev) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a24d4078fb21..0e6a2af7ed3d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -337,8 +337,12 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; -/* - * Operations and flags common to the bio and request structures. +#define REQ_OP_BITS 8 +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_FLAG_BITS 24 + +/** + * enum req_op - Operations common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. * * The least significant bit of the operation number indicates the data @@ -350,11 +354,7 @@ typedef __u32 __bitwise blk_mq_req_flags_t; * If a operation does not transfer data the least significant bit has no * meaning. */ -#define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) -#define REQ_FLAG_BITS 24 - -enum req_opf { +enum req_op { /* read sectors from the device */ REQ_OP_READ = 0, /* write sectors to the device */ @@ -509,7 +509,7 @@ static inline bool op_is_discard(unsigned int op) * due to its different handling in the block layer and device response in * case of command failure. */ -static inline bool op_is_zone_mgmt(enum req_opf op) +static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2775763c51b9..ec072a5129bf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -322,7 +322,7 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model); int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int bdev_nr_zones(struct block_device *bdev); -extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op, sector_t sectors, sector_t nr_sectors, gfp_t gfp_mask); int blk_revalidate_disk_zones(struct gendisk *disk, From 77e7ffd7ad3952909be6a9c599b7d164c8866fec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:28 -0700 Subject: [PATCH 093/178] block: Use enum req_op where appropriate Change the type of the arguments that are used to pass a REQ_OP_* value from int or unsigned int into enum req_op to improve static type checking. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-3-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +++--- block/blk-mq-debugfs.c | 2 +- block/blk-throttle.c | 7 ++++--- block/blk-wbt.c | 2 +- block/blk.h | 2 +- include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 6 +++--- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 8365996a8ef8..67b8bcfa27f0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -136,7 +136,7 @@ static const char *const blk_op_name[] = { * string format. Useful in the debugging and tracing bio or request. For * invalid REQ_OP_XXX it returns string "UNKNOWN". */ -inline const char *blk_op_str(unsigned int op) +inline const char *blk_op_str(enum req_op op) { const char *op_str = "UNKNOWN"; @@ -953,7 +953,7 @@ again: } unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, unsigned int op, + unsigned int sectors, enum req_op op, unsigned long start_time) { const int sgrp = op_stat_group(op); @@ -994,7 +994,7 @@ unsigned long bio_start_io_acct(struct bio *bio) } EXPORT_SYMBOL_GPL(bio_start_io_acct); -void bdev_end_io_acct(struct block_device *bdev, unsigned int op, +void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time) { const int sgrp = op_stat_group(op); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7ee1b13380d0..6cc2411e2d26 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -304,7 +304,7 @@ static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state) int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) { const struct blk_mq_ops *const mq_ops = rq->q->mq_ops; - const unsigned int op = req_op(rq); + const enum req_op op = req_op(rq); const char *op_str = blk_op_str(op); seq_printf(m, "%p {.op=", rq); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 139b2d7a99e2..9f5fe62afff9 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -2203,8 +2203,9 @@ out_unlock: #ifdef CONFIG_BLK_DEV_THROTTLING_LOW static void throtl_track_latency(struct throtl_data *td, sector_t size, - int op, unsigned long time) + enum req_op op, unsigned long time) { + const bool rw = op_is_write(op); struct latency_bucket *latency; int index; @@ -2215,10 +2216,10 @@ static void throtl_track_latency(struct throtl_data *td, sector_t size, index = request_bucket_index(size); - latency = get_cpu_ptr(td->latency_buckets[op]); + latency = get_cpu_ptr(td->latency_buckets[rw]); latency[index].total_latency += time; latency[index].samples++; - put_cpu_ptr(td->latency_buckets[op]); + put_cpu_ptr(td->latency_buckets[rw]); } void blk_throtl_stat_add(struct request *rq, u64 time_ns) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 0c119be0e813..7bf09ae06577 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -670,7 +670,7 @@ u64 wbt_default_latency_nsec(struct request_queue *q) static int wbt_data_dir(const struct request *rq) { - const int op = req_op(rq); + const enum req_op op = req_op(rq); if (op == REQ_OP_READ) return READ; diff --git a/block/blk.h b/block/blk.h index b71e22c97d77..c4b084bfe87c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -160,7 +160,7 @@ static inline bool blk_discard_mergable(struct request *req) } static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - int op) + enum req_op op) { if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0e6a2af7ed3d..cce8768bc00b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -522,7 +522,7 @@ static inline bool op_is_zone_mgmt(enum req_op op) } } -static inline int op_stat_group(unsigned int op) +static inline int op_stat_group(enum req_op op) { if (op_is_discard(op)) return STAT_DISCARD; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec072a5129bf..2f13f0062192 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -872,7 +872,7 @@ extern void blk_queue_exit(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); /* Helper to convert REQ_OP_XXX to its string format XXX */ -extern const char *blk_op_str(unsigned int op); +extern const char *blk_op_str(enum req_op op); int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); @@ -1434,9 +1434,9 @@ static inline void blk_wake_io_task(struct task_struct *waiter) } unsigned long bdev_start_io_acct(struct block_device *bdev, - unsigned int sectors, unsigned int op, + unsigned int sectors, enum req_op op, unsigned long start_time); -void bdev_end_io_acct(struct block_device *bdev, unsigned int op, +void bdev_end_io_acct(struct block_device *bdev, enum req_op op, unsigned long start_time); void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); From 86947df3a9236481276e8baadde50a403b02b4d4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:29 -0700 Subject: [PATCH 094/178] block: Change the type of the last .rw_page() argument All .rw_page() callers pass an enum req_op value as last argument. Make this explicit by changing the type of the last argument into enum req_op. See also commit 3f289dcb4b26 ("block: make bdev_ops->rw_page() take a REQ_OP instead of bool"). Cc: Tejun Heo Cc: Minchan Kim Cc: Dan Williams Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-4-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/brd.c | 2 +- drivers/block/zram/zram_drv.c | 2 +- drivers/nvdimm/btt.c | 2 +- drivers/nvdimm/pmem.c | 2 +- include/linux/blkdev.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 9e26d5e769f3..7b82876af36e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -310,7 +310,7 @@ static void brd_submit_bio(struct bio *bio) } static int brd_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, unsigned int op) + struct page *page, enum req_op op) { struct brd_device *brd = bdev->bd_disk->private_data; int err; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e5233c911e43..a35b86c58aa2 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1631,7 +1631,7 @@ static void zram_slot_free_notify(struct block_device *bdev, } static int zram_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, unsigned int op) + struct page *page, enum req_op op) { int offset, ret; u32 index; diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 5e622c0d4b66..dfbf73145d16 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1483,7 +1483,7 @@ static void btt_submit_bio(struct bio *bio) } static int btt_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, unsigned int op) + struct page *page, enum req_op op) { struct btt *btt = bdev->bd_disk->private_data; int rc; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index a72b81fa3242..f36efcc11f67 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -239,7 +239,7 @@ static void pmem_submit_bio(struct bio *bio) } static int pmem_rw_page(struct block_device *bdev, sector_t sector, - struct page *page, unsigned int op) + struct page *page, enum req_op op) { struct pmem_device *pmem = bdev->bd_disk->private_data; blk_status_t rc; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f13f0062192..ca2ff113ea00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1381,7 +1381,7 @@ struct block_device_operations { unsigned int flags); int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); - int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int); + int (*rw_page)(struct block_device *, sector_t, struct page *, enum req_op); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); unsigned int (*check_events) (struct gendisk *disk, From 2d9b02be73ba8efba406b399a722b4e33614dd0e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:30 -0700 Subject: [PATCH 095/178] block: Change the type of req_op() and bio_op() into enum req_op Improve static type checking by changing the type of the value returned by req_op() and bio_op() from unsigned int into enum req_op. Insert 'default: break;' in switch statements on the enum req_op type to prevent that the compiler warns about these switch statements. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Cc: Tim Waugh Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-5-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/blk-merge.c | 2 ++ drivers/block/paride/pd.c | 2 ++ drivers/md/dm.c | 2 ++ include/linux/blk-mq.h | 6 ++++-- include/linux/blk_types.h | 6 ++++-- 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index 5abf5aa5a5f0..de178a8b4c82 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -405,6 +405,8 @@ unsigned int blk_recalc_rq_segments(struct request *rq) return 1; case REQ_OP_WRITE_ZEROES: return 0; + default: + break; } rq_for_each_bvec(bv, rq, iter) diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index c8c14c6f5c3a..f8a75bc90f70 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c @@ -501,6 +501,8 @@ static enum action do_pd_io_start(void) return do_pd_read_start(); else return do_pd_write_start(); + default: + break; } return Fail; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 33d3799bb66e..6c21922b87d0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1542,6 +1542,8 @@ static blk_status_t __process_abnormal_io(struct clone_info *ci, case REQ_OP_WRITE_ZEROES: num_bios = ti->num_write_zeroes_bios; break; + default: + break; } /* diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d74f6a6b7e69..677195de0663 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -198,8 +198,10 @@ struct request { void *end_io_data; }; -#define req_op(req) \ - ((req)->cmd_flags & REQ_OP_MASK) +static inline enum req_op req_op(const struct request *req) +{ + return req->cmd_flags & REQ_OP_MASK; +} static inline bool blk_rq_is_passthrough(struct request *rq) { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index cce8768bc00b..e66cbe377ae8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -463,8 +463,10 @@ enum stat_group { NR_STAT_GROUPS }; -#define bio_op(bio) \ - ((bio)->bi_opf & REQ_OP_MASK) +static inline enum req_op bio_op(const struct bio *bio) +{ + return bio->bi_opf & REQ_OP_MASK; +} /* obsolete, don't use in new code */ static inline void bio_set_op_attrs(struct bio *bio, unsigned op, From 342a72a334073f163da924b69c3d3fb4685eb33a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:31 -0700 Subject: [PATCH 096/178] block: Introduce the type blk_opf_t Introduce the type blk_opf_t for the request operation and flags (REQ_OP_* and REQ_*). This type will be used to improve documentation of the block layer code and also to allow sparse to verify whether request flags are used correctly. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-6-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 93 +++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 44 deletions(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e66cbe377ae8..1ef99790f6ed 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -240,6 +240,8 @@ static inline void bio_issue_init(struct bio_issue *issue, ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } +typedef __u32 __bitwise blk_opf_t; + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U @@ -250,7 +252,7 @@ typedef unsigned int blk_qc_t; struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned int bi_opf; /* bottom bits REQ_OP, top bits + blk_opf_t bi_opf; /* bottom bits REQ_OP, top bits * req_flags. */ unsigned short bi_flags; /* BIO_* below */ @@ -338,7 +340,7 @@ enum { typedef __u32 __bitwise blk_mq_req_flags_t; #define REQ_OP_BITS 8 -#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define REQ_OP_MASK (__force blk_opf_t)((1 << REQ_OP_BITS) - 1) #define REQ_FLAG_BITS 24 /** @@ -356,35 +358,35 @@ typedef __u32 __bitwise blk_mq_req_flags_t; */ enum req_op { /* read sectors from the device */ - REQ_OP_READ = 0, + REQ_OP_READ = (__force blk_opf_t)0, /* write sectors to the device */ - REQ_OP_WRITE = 1, + REQ_OP_WRITE = (__force blk_opf_t)1, /* flush the volatile write cache */ - REQ_OP_FLUSH = 2, + REQ_OP_FLUSH = (__force blk_opf_t)2, /* discard sectors */ - REQ_OP_DISCARD = 3, + REQ_OP_DISCARD = (__force blk_opf_t)3, /* securely erase sectors */ - REQ_OP_SECURE_ERASE = 5, + REQ_OP_SECURE_ERASE = (__force blk_opf_t)5, /* write the zero filled sector many times */ - REQ_OP_WRITE_ZEROES = 9, + REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = 10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, /* Close a zone */ - REQ_OP_ZONE_CLOSE = 11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = 12, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)12, /* write data at the current zone write pointer */ - REQ_OP_ZONE_APPEND = 13, + REQ_OP_ZONE_APPEND = (__force blk_opf_t)13, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = 15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)15, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = 17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, /* Driver private requests */ - REQ_OP_DRV_IN = 34, - REQ_OP_DRV_OUT = 35, + REQ_OP_DRV_IN = (__force blk_opf_t)34, + REQ_OP_DRV_OUT = (__force blk_opf_t)35, - REQ_OP_LAST, + REQ_OP_LAST = (__force blk_opf_t)36, }; enum req_flag_bits { @@ -425,28 +427,31 @@ enum req_flag_bits { __REQ_NR_BITS, /* stops here */ }; -#define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) -#define REQ_SYNC (1ULL << __REQ_SYNC) -#define REQ_META (1ULL << __REQ_META) -#define REQ_PRIO (1ULL << __REQ_PRIO) -#define REQ_NOMERGE (1ULL << __REQ_NOMERGE) -#define REQ_IDLE (1ULL << __REQ_IDLE) -#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) -#define REQ_FUA (1ULL << __REQ_FUA) -#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) -#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) -#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) -#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) -#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) +#define REQ_FAILFAST_DEV \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER \ + (__force blk_opf_t)(1ULL << __REQ_FAILFAST_DRIVER) +#define REQ_SYNC (__force blk_opf_t)(1ULL << __REQ_SYNC) +#define REQ_META (__force blk_opf_t)(1ULL << __REQ_META) +#define REQ_PRIO (__force blk_opf_t)(1ULL << __REQ_PRIO) +#define REQ_NOMERGE (__force blk_opf_t)(1ULL << __REQ_NOMERGE) +#define REQ_IDLE (__force blk_opf_t)(1ULL << __REQ_IDLE) +#define REQ_INTEGRITY (__force blk_opf_t)(1ULL << __REQ_INTEGRITY) +#define REQ_FUA (__force blk_opf_t)(1ULL << __REQ_FUA) +#define REQ_PREFLUSH (__force blk_opf_t)(1ULL << __REQ_PREFLUSH) +#define REQ_RAHEAD (__force blk_opf_t)(1ULL << __REQ_RAHEAD) +#define REQ_BACKGROUND (__force blk_opf_t)(1ULL << __REQ_BACKGROUND) +#define REQ_NOWAIT (__force blk_opf_t)(1ULL << __REQ_NOWAIT) +#define REQ_CGROUP_PUNT (__force blk_opf_t)(1ULL << __REQ_CGROUP_PUNT) -#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_POLLED (1ULL << __REQ_POLLED) -#define REQ_ALLOC_CACHE (1ULL << __REQ_ALLOC_CACHE) +#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP) +#define REQ_POLLED (__force blk_opf_t)(1ULL << __REQ_POLLED) +#define REQ_ALLOC_CACHE (__force blk_opf_t)(1ULL << __REQ_ALLOC_CACHE) -#define REQ_DRV (1ULL << __REQ_DRV) -#define REQ_SWAP (1ULL << __REQ_SWAP) +#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV) +#define REQ_SWAP (__force blk_opf_t)(1ULL << __REQ_SWAP) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) @@ -469,22 +474,22 @@ static inline enum req_op bio_op(const struct bio *bio) } /* obsolete, don't use in new code */ -static inline void bio_set_op_attrs(struct bio *bio, unsigned op, - unsigned op_flags) +static inline void bio_set_op_attrs(struct bio *bio, enum req_op op, + blk_opf_t op_flags) { bio->bi_opf = op | op_flags; } -static inline bool op_is_write(unsigned int op) +static inline bool op_is_write(blk_opf_t op) { - return (op & 1); + return !!(op & (__force blk_opf_t)1); } /* * Check if the bio or request is one that needs special treatment in the * flush state machine. */ -static inline bool op_is_flush(unsigned int op) +static inline bool op_is_flush(blk_opf_t op) { return op & (REQ_FUA | REQ_PREFLUSH); } @@ -494,13 +499,13 @@ static inline bool op_is_flush(unsigned int op) * PREFLUSH flag. Other operations may be marked as synchronous using the * REQ_SYNC flag. */ -static inline bool op_is_sync(unsigned int op) +static inline bool op_is_sync(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_READ || (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH)); } -static inline bool op_is_discard(unsigned int op) +static inline bool op_is_discard(blk_opf_t op) { return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } From 16458cf3bd15e5624205df6e8a76b9a5363555f3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:32 -0700 Subject: [PATCH 097/178] block: Use the new blk_opf_t type Use the new blk_opf_t type for arguments and variables that represent request flags or a bitwise combination of a request operation and request flags. Rename the function arguments and also a structure member that hold a request operation and flags from 'rw' into 'opf'. This patch does not change any functionality. Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Damien Le Moal Cc: Johannes Thumshirn Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-7-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/bio.c | 10 +++++----- block/blk-cgroup-rwstat.h | 8 ++++---- block/blk-core.c | 2 +- block/blk-flush.c | 6 +++--- block/blk-merge.c | 6 +++--- block/blk-mq-debugfs.c | 4 ++-- block/blk-mq.c | 15 ++++++++------- block/blk-mq.h | 6 +++--- block/blk-wbt.c | 16 ++++++++-------- block/elevator.h | 2 +- block/fops.c | 12 ++++++------ include/linux/bio.h | 10 +++++----- include/linux/blk-mq.h | 6 +++--- include/linux/blkdev.h | 2 +- 14 files changed, 53 insertions(+), 52 deletions(-) diff --git a/block/bio.c b/block/bio.c index 888ee81ea303..6f9f883f9a65 100644 --- a/block/bio.c +++ b/block/bio.c @@ -239,7 +239,7 @@ static void bio_free(struct bio *bio) * when IO has completed, or when the bio is released. */ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf) + unsigned short max_vecs, blk_opf_t opf) { bio->bi_next = NULL; bio->bi_bdev = bdev; @@ -292,7 +292,7 @@ EXPORT_SYMBOL(bio_init); * preserved are the ones that are initialized by bio_alloc_bioset(). See * comment in struct bio. */ -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf) +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf) { bio_uninit(bio); memset(bio, 0, BIO_RESET_BYTES); @@ -341,7 +341,7 @@ void bio_chain(struct bio *bio, struct bio *parent) EXPORT_SYMBOL(bio_chain); struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp) + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp) { struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp); @@ -409,7 +409,7 @@ static void punt_bios_to_rescuer(struct bio_set *bs) } static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp, + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp, struct bio_set *bs) { struct bio_alloc_cache *cache; @@ -468,7 +468,7 @@ static struct bio *bio_alloc_percpu_cache(struct block_device *bdev, * Returns: Pointer to new bio on success, NULL on failure. */ struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs) { gfp_t saved_gfp = gfp_mask; diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h index 9f2723b34b75..022527b0b043 100644 --- a/block/blk-cgroup-rwstat.h +++ b/block/blk-cgroup-rwstat.h @@ -59,20 +59,20 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, * caller is responsible for synchronizing calls to this function. */ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - unsigned int op, uint64_t val) + blk_opf_t opf, uint64_t val) { struct percpu_counter *cnt; - if (op_is_discard(op)) + if (op_is_discard(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; - else if (op_is_write(op)) + else if (op_is_write(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); - if (op_is_sync(op)) + if (op_is_sync(opf)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; diff --git a/block/blk-core.c b/block/blk-core.c index 67b8bcfa27f0..123468b9d2e4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1203,7 +1203,7 @@ EXPORT_SYMBOL_GPL(blk_io_schedule); int __init blk_dev_init(void) { - BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS)); + BUILD_BUG_ON((__force u32)REQ_OP_LAST >= (1 << REQ_OP_BITS)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * sizeof_field(struct request, cmd_flags)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * diff --git a/block/blk-flush.c b/block/blk-flush.c index c68968724870..d20a0c6b2c66 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -94,7 +94,7 @@ enum { }; static void blk_kick_flush(struct request_queue *q, - struct blk_flush_queue *fq, unsigned int flags); + struct blk_flush_queue *fq, blk_opf_t flags); static inline struct blk_flush_queue * blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx) @@ -173,7 +173,7 @@ static void blk_flush_complete_seq(struct request *rq, { struct request_queue *q = rq->q; struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; - unsigned int cmd_flags; + blk_opf_t cmd_flags; BUG_ON(rq->flush.seq & seq); rq->flush.seq |= seq; @@ -290,7 +290,7 @@ bool is_flush_rq(struct request *rq) * */ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, - unsigned int flags) + blk_opf_t flags) { struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx]; struct request *first_rq = diff --git a/block/blk-merge.c b/block/blk-merge.c index de178a8b4c82..3c3f785f558a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -712,7 +712,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, */ void blk_rq_set_mixed_merge(struct request *rq) { - unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; + blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; struct bio *bio; if (rq->rq_flags & RQF_MIXED_MERGE) @@ -928,7 +928,7 @@ enum bio_merge_status { static enum bio_merge_status bio_attempt_back_merge(struct request *req, struct bio *bio, unsigned int nr_segs) { - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; + const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK; if (!ll_back_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; @@ -952,7 +952,7 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req, static enum bio_merge_status bio_attempt_front_merge(struct request *req, struct bio *bio, unsigned int nr_segs) { - const int ff = bio->bi_opf & REQ_FAILFAST_MASK; + const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK; if (!ll_front_merge_fn(req, bio, nr_segs)) return BIO_MERGE_FAILED; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 6cc2411e2d26..8559cea7f300 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -313,8 +313,8 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq) else seq_printf(m, "%s", op_str); seq_puts(m, ", .cmd_flags="); - blk_flags_show(m, rq->cmd_flags & ~REQ_OP_MASK, cmd_flag_name, - ARRAY_SIZE(cmd_flag_name)); + blk_flags_show(m, (__force unsigned int)(rq->cmd_flags & ~REQ_OP_MASK), + cmd_flag_name, ARRAY_SIZE(cmd_flag_name)); seq_puts(m, ", .rq_flags="); blk_flags_show(m, (__force unsigned int)rq->rq_flags, rqf_name, ARRAY_SIZE(rqf_name)); diff --git a/block/blk-mq.c b/block/blk-mq.c index f1b84e20b1a9..d716b7f3763f 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -510,13 +510,13 @@ retry: alloc_time_ns); } -struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags) { struct blk_mq_alloc_data data = { .q = q, .flags = flags, - .cmd_flags = op, + .cmd_flags = opf, .nr_tags = 1, }; struct request *rq; @@ -540,12 +540,12 @@ out_queue_exit: EXPORT_SYMBOL(blk_mq_alloc_request); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags, unsigned int hctx_idx) + blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx) { struct blk_mq_alloc_data data = { .q = q, .flags = flags, - .cmd_flags = op, + .cmd_flags = opf, .nr_tags = 1, }; u64 alloc_time_ns = 0; @@ -660,7 +660,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) { printk(KERN_INFO "%s: dev %s: flags=%llx\n", msg, rq->q->disk ? rq->q->disk->disk_name : "?", - (unsigned long long) rq->cmd_flags); + (__force unsigned long long) rq->cmd_flags); printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", (unsigned long long)blk_rq_pos(rq), @@ -713,8 +713,9 @@ static void blk_print_req_error(struct request *req, blk_status_t status) "phys_seg %u prio class %u\n", blk_status_to_str(status), req->q->disk ? req->q->disk->disk_name : "?", - blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)), - req->cmd_flags & ~REQ_OP_MASK, + blk_rq_pos(req), (__force u32)req_op(req), + blk_op_str(req_op(req)), + (__force u32)(req->cmd_flags & ~REQ_OP_MASK), req->nr_phys_segments, IOPRIO_PRIO_CLASS(req->ioprio)); } diff --git a/block/blk-mq.h b/block/blk-mq.h index e694ec67d646..8ca453ac243d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -86,7 +86,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue * return xa_load(&q->hctx_table, q->tag_set->map[type].mq_map[cpu]); } -static inline enum hctx_type blk_mq_get_hctx_type(unsigned int opf) +static inline enum hctx_type blk_mq_get_hctx_type(blk_opf_t opf) { enum hctx_type type = HCTX_TYPE_DEFAULT; @@ -107,7 +107,7 @@ static inline enum hctx_type blk_mq_get_hctx_type(unsigned int opf) * @ctx: software queue cpu ctx */ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, - unsigned int opf, + blk_opf_t opf, struct blk_mq_ctx *ctx) { return ctx->hctxs[blk_mq_get_hctx_type(opf)]; @@ -152,7 +152,7 @@ struct blk_mq_alloc_data { struct request_queue *q; blk_mq_req_flags_t flags; unsigned int shallow_depth; - unsigned int cmd_flags; + blk_opf_t cmd_flags; req_flags_t rq_flags; /* allocate multiple requests/tags in one go */ diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 7bf09ae06577..f2e4bf1dca47 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -451,7 +451,7 @@ static bool close_io(struct rq_wb *rwb) #define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) -static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) +static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) { unsigned int limit; @@ -462,7 +462,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) if (!rwb_enabled(rwb)) return UINT_MAX; - if ((rw & REQ_OP_MASK) == REQ_OP_DISCARD) + if ((opf & REQ_OP_MASK) == REQ_OP_DISCARD) return rwb->wb_background; /* @@ -473,9 +473,9 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ - if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) + if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) limit = rwb->rq_depth.max_depth; - else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { + else if ((opf & REQ_BACKGROUND) || close_io(rwb)) { /* * If less than 100ms since we completed unrelated IO, * limit us to half the depth for background writeback. @@ -490,13 +490,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) struct wbt_wait_data { struct rq_wb *rwb; enum wbt_flags wb_acct; - unsigned long rw; + blk_opf_t opf; }; static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; - return rq_wait_inc_below(rqw, get_limit(data->rwb, data->rw)); + return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf)); } static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) @@ -510,13 +510,13 @@ static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) * the timer to kick off queuing again. */ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, - unsigned long rw) + blk_opf_t opf) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); struct wbt_wait_data data = { .rwb = rwb, .wb_acct = wb_acct, - .rw = rw, + .opf = opf, }; rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); diff --git a/block/elevator.h b/block/elevator.h index 16cd8bdedb7e..3f0593b3bf9d 100644 --- a/block/elevator.h +++ b/block/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); - void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *); + void (*limit_depth)(blk_opf_t, struct blk_mq_alloc_data *); void (*prepare_request)(struct request *); void (*finish_request)(struct request *); void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); diff --git a/block/fops.c b/block/fops.c index 86d3cab9bf93..29066ac5a2fa 100644 --- a/block/fops.c +++ b/block/fops.c @@ -32,14 +32,14 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock, return 0; } -static unsigned int dio_bio_write_op(struct kiocb *iocb) +static blk_opf_t dio_bio_write_op(struct kiocb *iocb) { - unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; + blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; /* avoid the need for a I/O completion work item */ if (iocb->ki_flags & IOCB_DSYNC) - op |= REQ_FUA; - return op; + opf |= REQ_FUA; + return opf; } static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos, @@ -175,7 +175,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, struct blkdev_dio *dio; struct bio *bio; bool is_read = (iov_iter_rw(iter) == READ), is_sync; - unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); + blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); loff_t pos = iocb->ki_pos; int ret = 0; @@ -297,7 +297,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, { struct block_device *bdev = iocb->ki_filp->private_data; bool is_read = iov_iter_rw(iter) == READ; - unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); + blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); struct blkdev_dio *dio; struct bio *bio; loff_t pos = iocb->ki_pos; diff --git a/include/linux/bio.h b/include/linux/bio.h index 992ee987f273..ca22b06700a9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -405,7 +405,7 @@ extern void bioset_exit(struct bio_set *); extern int biovec_init_pool(mempool_t *pool, int pool_entries); struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, - unsigned int opf, gfp_t gfp_mask, + blk_opf_t opf, gfp_t gfp_mask, struct bio_set *bs); struct bio *bio_kmalloc(unsigned short nr_vecs, gfp_t gfp_mask); extern void bio_put(struct bio *); @@ -418,7 +418,7 @@ int bio_init_clone(struct block_device *bdev, struct bio *bio, extern struct bio_set fs_bio_set; static inline struct bio *bio_alloc(struct block_device *bdev, - unsigned short nr_vecs, unsigned int opf, gfp_t gfp_mask) + unsigned short nr_vecs, blk_opf_t opf, gfp_t gfp_mask) { return bio_alloc_bioset(bdev, nr_vecs, opf, gfp_mask, &fs_bio_set); } @@ -456,9 +456,9 @@ struct request_queue; extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, - unsigned short max_vecs, unsigned int opf); + unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); -void bio_reset(struct bio *bio, struct block_device *bdev, unsigned int opf); +void bio_reset(struct bio *bio, struct block_device *bdev, blk_opf_t opf); void bio_chain(struct bio *, struct bio *); int bio_add_page(struct bio *, struct page *, unsigned len, unsigned off); @@ -789,6 +789,6 @@ static inline void bio_clear_polled(struct bio *bio) } struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev, - unsigned int nr_pages, unsigned int opf, gfp_t gfp); + unsigned int nr_pages, blk_opf_t opf, gfp_t gfp); #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 677195de0663..effee1dc715a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -80,7 +80,7 @@ struct request { struct blk_mq_ctx *mq_ctx; struct blk_mq_hw_ctx *mq_hctx; - unsigned int cmd_flags; /* op and common flags */ + blk_opf_t cmd_flags; /* op and common flags */ req_flags_t rq_flags; int tag; @@ -715,10 +715,10 @@ enum { BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2), }; -struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, +struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags, + blk_opf_t opf, blk_mq_req_flags_t flags, unsigned int hctx_idx); /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ca2ff113ea00..d04bdf549efa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ static inline int blk_validate_block_size(unsigned long bsize) return 0; } -static inline bool blk_op_is_passthrough(unsigned int op) +static inline bool blk_op_is_passthrough(blk_opf_t op) { op &= REQ_OP_MASK; return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; From dc469ba2e790cb0a335e2650b701639752ff65cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:33 -0700 Subject: [PATCH 098/178] block/bfq: Use the new blk_opf_t type Use the new blk_opf_t type for arguments and variables that represent request flags or a bitwise combination of a request operation and request flags. Rename those variables from 'op' into 'opf'. This patch does not change any functionality. Cc: Jan Kara Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-8-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 26 +++++++++++++------------- block/bfq-iosched.c | 16 ++++++++-------- block/bfq-iosched.h | 8 ++++---- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 9fc605791b1e..30b15a9a47c4 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -220,46 +220,46 @@ void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) + blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.queued, op, 1); + blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == ((struct bfq_data *)bfqg->bfqd)->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.queued, op, -1); + blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { - blkg_rwstat_add(&bfqg->stats.merged, op, 1); + blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op) + u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = ktime_get_ns(); if (now > io_start_time_ns) - blkg_rwstat_add(&stats->service_time, op, + blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) - blkg_rwstat_add(&stats->wait_time, op, + blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op) { } -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op) { } -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op) { } + blk_opf_t opf) { } +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op) { } + u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { } diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e6d7e6b01a05..c740b41fe0a4 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -668,19 +668,19 @@ static bool bfqq_request_over_limit(struct bfq_queue *bfqq, int limit) * significantly affect service guarantees coming from the BFQ scheduling * algorithm. */ -static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +static void bfq_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct bfq_data *bfqd = data->q->elevator->elevator_data; struct bfq_io_cq *bic = bfq_bic_lookup(data->q); - struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(op)) : NULL; + struct bfq_queue *bfqq = bic ? bic_to_bfqq(bic, op_is_sync(opf)) : NULL; int depth; unsigned limit = data->q->nr_requests; /* Sync reads have full depth available */ - if (op_is_sync(op) && !op_is_write(op)) { + if (op_is_sync(opf) && !op_is_write(opf)) { depth = 0; } else { - depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)]; + depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(opf)]; limit = (limit * depth) >> bfqd->full_depth_shift; } @@ -693,7 +693,7 @@ static void bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) depth = 1; bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u", - __func__, bfqd->wr_busy_queues, op_is_sync(op), depth); + __func__, bfqd->wr_busy_queues, op_is_sync(opf), depth); if (depth) data->shallow_depth = depth; } @@ -6104,7 +6104,7 @@ static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) static void bfq_update_insert_stats(struct request_queue *q, struct bfq_queue *bfqq, bool idle_timer_disabled, - unsigned int cmd_flags) + blk_opf_t cmd_flags) { if (!bfqq) return; @@ -6129,7 +6129,7 @@ static void bfq_update_insert_stats(struct request_queue *q, static inline void bfq_update_insert_stats(struct request_queue *q, struct bfq_queue *bfqq, bool idle_timer_disabled, - unsigned int cmd_flags) {} + blk_opf_t cmd_flags) {} #endif /* CONFIG_BFQ_CGROUP_DEBUG */ static struct bfq_queue *bfq_init_rq(struct request *rq); @@ -6141,7 +6141,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct bfq_data *bfqd = q->elevator->elevator_data; struct bfq_queue *bfqq; bool idle_timer_disabled = false; - unsigned int cmd_flags; + blk_opf_t cmd_flags; LIST_HEAD(free); #ifdef CONFIG_BFQ_GROUP_IOSCHED diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index ca8177d7bf7c..ad8e513d7e87 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -994,11 +994,11 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg); void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq); void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, - unsigned int op); -void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op); -void bfqg_stats_update_io_merged(struct bfq_group *bfqg, unsigned int op); + blk_opf_t opf); +void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf); +void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf); void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, - u64 io_start_time_ns, unsigned int op); + u64 io_start_time_ns, blk_opf_t opf); void bfqg_stats_update_dequeue(struct bfq_group *bfqg); void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg); void bfqg_stats_update_idle_time(struct bfq_group *bfqg); From f8359efe4742a39b4ece554ab9d7e5f03c4fff83 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:34 -0700 Subject: [PATCH 099/178] block/mq-deadline: Use the new blk_opf_t type Use the new blk_opf_t type for an argument that represents a bitwise combination of a request operation and request flags. Rename that argument from 'op' into 'opf'. This patch does not change any functionality. Cc: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-9-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/mq-deadline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 1a9e835e816c..5639921dfa92 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -543,12 +543,12 @@ unlock: * Called by __blk_mq_alloc_request(). The shallow_depth value set by this * function is used by __blk_mq_get_tag(). */ -static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { struct deadline_data *dd = data->q->elevator->elevator_data; /* Do not throttle synchronous reads. */ - if (op_is_sync(op) && !op_is_write(op)) + if (op_is_sync(opf) && !op_is_write(opf)) return; /* From d625fecd8af84ac669075caf1941ff0d1995de56 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:35 -0700 Subject: [PATCH 100/178] block/kyber: Use the new blk_opf_t type Use the new blk_opf_t type for arguments that represent a bitwise combination of a request operation and request flags. Rename those arguments from 'op' into 'opf'. This patch does not change any functionality. Cc: Omar Sandoval Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-10-bvanassche@acm.org Signed-off-by: Jens Axboe --- block/kyber-iosched.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 8f7c745b4a57..b05357bced99 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -195,9 +195,9 @@ struct kyber_hctx_data { static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags, void *key); -static unsigned int kyber_sched_domain(unsigned int op) +static unsigned int kyber_sched_domain(blk_opf_t opf) { - switch (op & REQ_OP_MASK) { + switch (opf & REQ_OP_MASK) { case REQ_OP_READ: return KYBER_READ; case REQ_OP_WRITE: @@ -553,13 +553,13 @@ static void rq_clear_domain_token(struct kyber_queue_data *kqd, } } -static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +static void kyber_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data) { /* * We use the scheduler tags as per-hardware queue queueing tokens. * Async requests can be limited at this stage. */ - if (!op_is_sync(op)) { + if (!op_is_sync(opf)) { struct kyber_queue_data *kqd = data->q->elevator->elevator_data; data->shallow_depth = kqd->async_depth; From 22c80aac882f712897b88b7ea8f5a74ea19019df Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:36 -0700 Subject: [PATCH 101/178] blktrace: Trace remapped requests correctly Trace the remapped operation and its flags instead of only the data direction of remapped operations. This issue was detected by analyzing the warnings reported by sparse related to the new blk_opf_t type. Reviewed-by: Jun'ichi Nomura Cc: Mike Snitzer Cc: Mike Christie Cc: Li Zefan Cc: Chaitanya Kulkarni Fixes: 1b9a9ab78b0a ("blktrace: use op accessors") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-11-bvanassche@acm.org Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 4752bda1b1a0..4327b51da403 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1058,7 +1058,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), - rq_data_dir(rq), 0, BLK_TA_REMAP, 0, + req_op(rq), rq->cmd_flags, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } From 919dbca8670d0f7828dfbb2f9b434ac22dca8d2e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:37 -0700 Subject: [PATCH 102/178] blktrace: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for a function argument that represents a combination of a request operation and request flags. Rename that argument from 'op' into 'opf' to make its role more clear. Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Li Zefan Cc: Chaitanya Kulkarni Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-12-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 3 ++- kernel/trace/blktrace.c | 51 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index f6f9b544365a..cfbda114348c 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -7,6 +7,7 @@ #include #include #include +#include #if defined(CONFIG_BLK_DEV_IO_TRACE) @@ -105,7 +106,7 @@ struct compat_blk_user_trace_setup { #endif -void blk_fill_rwbs(char *rwbs, unsigned int op); +void blk_fill_rwbs(char *rwbs, blk_opf_t opf); static inline sector_t blk_rq_trace_sector(struct request *rq) { diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 4327b51da403..150058f5daa9 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -205,7 +205,7 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), #define BLK_TC_PREFLUSH BLK_TC_FLUSH /* The ilog2() calls fall out because they're constant */ -#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ +#define MASK_TC_BIT(rw, __name) ((__force u32)(rw & REQ_ ## __name) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* @@ -213,8 +213,8 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, - int op, int op_flags, u32 what, int error, int pdu_len, - void *pdu_data, u64 cgid) + const blk_opf_t opf, u32 what, int error, + int pdu_len, void *pdu_data, u64 cgid) { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; @@ -227,16 +227,17 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; + const enum req_op op = opf & REQ_OP_MASK; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[op_is_write(op) ? WRITE : READ]; - what |= MASK_TC_BIT(op_flags, SYNC); - what |= MASK_TC_BIT(op_flags, RAHEAD); - what |= MASK_TC_BIT(op_flags, META); - what |= MASK_TC_BIT(op_flags, PREFLUSH); - what |= MASK_TC_BIT(op_flags, FUA); + what |= MASK_TC_BIT(opf, SYNC); + what |= MASK_TC_BIT(opf, RAHEAD); + what |= MASK_TC_BIT(opf, META); + what |= MASK_TC_BIT(opf, PREFLUSH); + what |= MASK_TC_BIT(opf, FUA); if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) @@ -842,9 +843,8 @@ static void blk_add_trace_rq(struct request *rq, blk_status_t error, else what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), - rq->cmd_flags, what, blk_status_to_errno(error), 0, - NULL, cgid); + __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, rq->cmd_flags, + what, blk_status_to_errno(error), 0, NULL, cgid); rcu_read_unlock(); } @@ -903,7 +903,7 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, what, error, 0, NULL, + bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } @@ -949,7 +949,7 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt) - __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); rcu_read_unlock(); } @@ -969,7 +969,7 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, else what = BLK_TA_UNPLUG_TIMER; - __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); + __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } rcu_read_unlock(); } @@ -985,8 +985,7 @@ static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) __be64 rpdu = cpu_to_be64(pdu); __blk_add_trace(bt, bio->bi_iter.bi_sector, - bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, - BLK_TA_SPLIT, + bio->bi_iter.bi_size, bio->bi_opf, BLK_TA_SPLIT, blk_status_to_errno(bio->bi_status), sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); @@ -1022,7 +1021,7 @@ static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio_op(bio), bio->bi_opf, BLK_TA_REMAP, + bio->bi_opf, BLK_TA_REMAP, blk_status_to_errno(bio->bi_status), sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); @@ -1058,7 +1057,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), - req_op(rq), rq->cmd_flags, BLK_TA_REMAP, 0, + rq->cmd_flags, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } @@ -1084,7 +1083,7 @@ void blk_add_driver_data(struct request *rq, void *data, size_t len) return; } - __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, + __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); @@ -1881,14 +1880,14 @@ out: * caller with resulting string. * **/ -void blk_fill_rwbs(char *rwbs, unsigned int op) +void blk_fill_rwbs(char *rwbs, blk_opf_t opf) { int i = 0; - if (op & REQ_PREFLUSH) + if (opf & REQ_PREFLUSH) rwbs[i++] = 'F'; - switch (op & REQ_OP_MASK) { + switch (opf & REQ_OP_MASK) { case REQ_OP_WRITE: rwbs[i++] = 'W'; break; @@ -1909,13 +1908,13 @@ void blk_fill_rwbs(char *rwbs, unsigned int op) rwbs[i++] = 'N'; } - if (op & REQ_FUA) + if (opf & REQ_FUA) rwbs[i++] = 'F'; - if (op & REQ_RAHEAD) + if (opf & REQ_RAHEAD) rwbs[i++] = 'A'; - if (op & REQ_SYNC) + if (opf & REQ_SYNC) rwbs[i++] = 'S'; - if (op & REQ_META) + if (opf & REQ_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; From ba91fd01aad28b2290a00518c4cd6eb728b4f06f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:38 -0700 Subject: [PATCH 103/178] block/brd: Use the enum req_op type Improve static type checking by using the enum req_op type for a function argument that represents a request operation. Cc: Christoph Hellwig Cc: Ming Lei Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-13-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/brd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 7b82876af36e..859499cd1ff8 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -256,7 +256,7 @@ static void copy_from_brd(void *dst, struct brd_device *brd, * Process a single bvec of a bio. */ static int brd_do_bvec(struct brd_device *brd, struct page *page, - unsigned int len, unsigned int off, unsigned int op, + unsigned int len, unsigned int off, enum req_op op, sector_t sector) { void *mem; From 9945172a7120790fb8832cfec9557773f69e9d74 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:39 -0700 Subject: [PATCH 104/178] block/drbd: Use the enum req_op and blk_opf_t types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Reviewed-by: Christoph Böhmwalder Cc: Lars Ellenberg Cc: Philipp Reisner Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-14-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_actlog.c | 9 +++++---- drivers/block/drbd/drbd_bitmap.c | 2 +- drivers/block/drbd/drbd_int.h | 6 +++--- drivers/block/drbd/drbd_receiver.c | 11 ++++++----- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index f5bcded3640d..e27478ae579c 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -124,12 +124,13 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b static int _drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, - sector_t sector, int op) + sector_t sector, enum req_op op) { struct bio *bio; /* we do all our meta data IO in aligned 4k blocks. */ const int size = 4096; - int err, op_flags = 0; + int err; + blk_opf_t op_flags = 0; device->md_io.done = 0; device->md_io.error = -ENODEV; @@ -174,7 +175,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, } int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev, - sector_t sector, int op) + sector_t sector, enum req_op op) { int err; D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1); @@ -385,7 +386,7 @@ static int __al_write_transaction(struct drbd_device *device, struct al_transact write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates; rcu_read_unlock(); if (write_al_updates) { - if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) { + if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) { err = -EIO; drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR); } else { diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 9e060e49b3f8..603f6828dd79 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -977,7 +977,7 @@ static void drbd_bm_endio(struct bio *bio) static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local) { struct drbd_device *device = ctx->device; - unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE; + enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE; struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO, &drbd_md_io_bio_set); struct drbd_bitmap *b = device->bitmap; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4d3efaa20b7b..ecb2ecd8d67d 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1495,7 +1495,7 @@ extern int drbd_resync_finished(struct drbd_device *device); extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent); extern void drbd_md_put_buffer(struct drbd_device *device); extern int drbd_md_sync_page_io(struct drbd_device *device, - struct drbd_backing_dev *bdev, sector_t sector, int op); + struct drbd_backing_dev *bdev, sector_t sector, enum req_op op); extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int); extern void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_backing_dev *bdev, unsigned int *done); @@ -1547,8 +1547,8 @@ extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, bool throttle_if_app_is_waiting); extern int drbd_submit_peer_request(struct drbd_device *, - struct drbd_peer_request *, const unsigned, - const unsigned, const int); + struct drbd_peer_request *, enum req_op, + blk_opf_t, int); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, sector_t, unsigned int, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6762be53f409..caf646dd91ba 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1621,7 +1621,7 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru /* TODO allocate from our own bio_set. */ int drbd_submit_peer_request(struct drbd_device *device, struct drbd_peer_request *peer_req, - const unsigned op, const unsigned op_flags, + const enum req_op op, const blk_opf_t op_flags, const int fault_type) { struct bio *bios = NULL; @@ -2383,14 +2383,14 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co /* see also bio_flags_to_wire() * DRBD_REQ_*, because we need to semantically map the flags to data packet * flags and back. We may replicate to other kernel versions. */ -static unsigned long wire_flags_to_bio_flags(u32 dpf) +static blk_opf_t wire_flags_to_bio_flags(u32 dpf) { return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | (dpf & DP_FUA ? REQ_FUA : 0) | (dpf & DP_FLUSH ? REQ_PREFLUSH : 0); } -static unsigned long wire_flags_to_bio_op(u32 dpf) +static enum req_op wire_flags_to_bio_op(u32 dpf) { if (dpf & DP_ZEROES) return REQ_OP_WRITE_ZEROES; @@ -2543,7 +2543,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * struct drbd_peer_request *peer_req; struct p_data *p = pi->data; u32 peer_seq = be32_to_cpu(p->seq_num); - int op, op_flags; + enum req_op op; + blk_opf_t op_flags; u32 dp_flags; int err, tp; @@ -4951,7 +4952,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac if (get_ldev(device)) { struct drbd_peer_request *peer_req; - const int op = REQ_OP_WRITE_ZEROES; + const enum req_op op = REQ_OP_WRITE_ZEROES; peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector, size, 0, GFP_NOIO); From 86563de87447ad9458fda9d1862c5ba333c8ab2e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:40 -0700 Subject: [PATCH 105/178] block/drbd: Combine two drbd_submit_peer_request() arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Combine the drbd_submit_peer_request() 'op' and 'op_flags' arguments into a single argument. This patch does not change any functionality. Reviewed-by: Christoph Böhmwalder Cc: Lars Ellenberg Cc: Philipp Reisner Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-15-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_int.h | 3 +-- drivers/block/drbd/drbd_receiver.c | 15 +++++++-------- drivers/block/drbd/drbd_worker.c | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index ecb2ecd8d67d..f15f2f041596 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1547,8 +1547,7 @@ extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector, bool throttle_if_app_is_waiting); extern int drbd_submit_peer_request(struct drbd_device *, - struct drbd_peer_request *, enum req_op, - blk_opf_t, int); + struct drbd_peer_request *, blk_opf_t, int); extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, sector_t, unsigned int, diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index caf646dd91ba..af4c7d65490b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1621,8 +1621,7 @@ static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, stru /* TODO allocate from our own bio_set. */ int drbd_submit_peer_request(struct drbd_device *device, struct drbd_peer_request *peer_req, - const enum req_op op, const blk_opf_t op_flags, - const int fault_type) + const blk_opf_t opf, const int fault_type) { struct bio *bios = NULL; struct bio *bio; @@ -1668,8 +1667,7 @@ int drbd_submit_peer_request(struct drbd_device *device, * generated bio, but a bio allocated on behalf of the peer. */ next_bio: - bio = bio_alloc(device->ldev->backing_bdev, nr_pages, op | op_flags, - GFP_NOIO); + bio = bio_alloc(device->ldev->backing_bdev, nr_pages, opf, GFP_NOIO); /* > peer_req->i.sector, unless this is the first bio */ bio->bi_iter.bi_sector = sector; bio->bi_private = peer_req; @@ -2060,7 +2058,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto spin_unlock_irq(&device->resource->req_lock); atomic_add(pi->size >> 9, &device->rs_sect_ev); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0, + if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, DRBD_FAULT_RS_WR) == 0) return 0; @@ -2682,7 +2680,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * peer_req->flags |= EE_CALL_AL_COMPLETE_IO; } - err = drbd_submit_peer_request(device, peer_req, op, op_flags, + err = drbd_submit_peer_request(device, peer_req, op | op_flags, DRBD_FAULT_DT_WR); if (!err) return 0; @@ -2980,7 +2978,7 @@ submit_for_resync: submit: update_receiver_timing_details(connection, drbd_submit_peer_request); inc_unacked(device); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, + if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, fault_type) == 0) return 0; @@ -4970,7 +4968,8 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac spin_unlock_irq(&device->resource->req_lock); atomic_add(pi->size >> 9, &device->rs_sect_ev); - err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR); + err = drbd_submit_peer_request(device, peer_req, op, + DRBD_FAULT_RS_WR); if (err) { spin_lock_irq(&device->resource->req_lock); diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index af3051dd8912..0bb1a900c2d5 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -405,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, spin_unlock_irq(&device->resource->req_lock); atomic_add(size >> 9, &device->rs_sect_ev); - if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0, + if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, DRBD_FAULT_RS_RD) == 0) return 0; From 23f8ae7148cc32287364741e32b20f37730114aa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:41 -0700 Subject: [PATCH 106/178] block/floppy: Fix a sparse warning Since the type of request.cmd_flags has been changed from u32 into blk_opf_t, use the __force keyword when casting to an integer type to prevent that sparse warns about this cast. Cc: Denis Efremov Cc: Willy Tarreau Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-16-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/floppy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 491e7205a0db..ccad3d7b3ddd 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2859,7 +2859,7 @@ static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx, if (WARN(atomic_read(&usage_count) == 0, "warning: usage count=0, current_req=%p sect=%ld flags=%llx\n", current_req, (long)blk_rq_pos(current_req), - (unsigned long long) current_req->cmd_flags)) + (__force unsigned long long) current_req->cmd_flags)) return BLK_STS_IOERR; if (test_and_set_bit(0, &fdc_busy)) { From 03df83ac9eb77f749bfd84c7d448cb2b90c1196c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:42 -0700 Subject: [PATCH 107/178] block/rnbd: Use blk_opf_t where appropriate Improve static type checking by using the new blk_opf_t type to represent the combination of a request and request flags. Acked-by: Jack Wang Cc: Md. Haris Iqbal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-17-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-proto.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h index bfb08dd434d1..ea7ac8bca63c 100644 --- a/drivers/block/rnbd/rnbd-proto.h +++ b/drivers/block/rnbd/rnbd-proto.h @@ -229,9 +229,9 @@ static inline bool rnbd_flags_supported(u32 flags) return true; } -static inline u32 rnbd_to_bio_flags(u32 rnbd_opf) +static inline blk_opf_t rnbd_to_bio_flags(u32 rnbd_opf) { - u32 bio_opf; + blk_opf_t bio_opf; switch (rnbd_op(rnbd_opf)) { case RNBD_OP_READ: @@ -286,7 +286,8 @@ static inline u32 rq_to_rnbd_flags(struct request *rq) break; default: WARN(1, "Unknown request type %d (flags %llu)\n", - req_op(rq), (unsigned long long)rq->cmd_flags); + (__force u32)req_op(rq), + (__force unsigned long long)rq->cmd_flags); rnbd_opf = 0; } From 6c5412e268340e0d98eade4571658bacb4652176 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:43 -0700 Subject: [PATCH 108/178] xen-blkback: Use the enum req_op and blk_opf_t types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improve static type checking by using the enum req_op type for request operations and the new blk_opf_t type for request flags. Acked-by: Roger Pau Monné Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-18-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/xen-blkback/blkback.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index a97f2bf5b01b..a5cf7f1e871c 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -442,7 +442,7 @@ static void free_req(struct xen_blkif_ring *ring, struct pending_req *req) * Routines for managing virtual block devices (vbds). */ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif, - int operation) + enum req_op operation) { struct xen_vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -1193,8 +1193,8 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, struct bio *bio = NULL; struct bio **biolist = pending_req->biolist; int i, nbio = 0; - int operation; - int operation_flags = 0; + enum req_op operation; + blk_opf_t operation_flags = 0; struct blk_plug plug; bool drain = false; struct grant_page **pages = pending_req->segments; From bc0421ea44b82d2108bcf79e020498c5ff0000af Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:44 -0700 Subject: [PATCH 109/178] block/zram: Use enum req_op where appropriate Improve static type checking by using the enum req_op type where appropriate. Cc: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-19-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a35b86c58aa2..4abeb261b833 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1523,7 +1523,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, * Returns 1 if IO request was successfully submitted. */ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, unsigned int op, struct bio *bio) + int offset, enum req_op op, struct bio *bio) { int ret; From ba229aa8f2494bb76aa3f0c80e8a6c0023c829d7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:45 -0700 Subject: [PATCH 110/178] nvdimm-btt: Use the enum req_op type Improve static type checking by using the enum req_op type where appropriate. Cc: Dan Williams Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-20-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/nvdimm/btt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index dfbf73145d16..0297b7882e33 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1422,7 +1422,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip, struct page *page, unsigned int len, unsigned int off, - unsigned int op, sector_t sector) + enum req_op op, sector_t sector) { int ret; From 7ee1de6e2712efabe8e6cab8db5238ed13643dc1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:46 -0700 Subject: [PATCH 111/178] um: Use enum req_op where appropriate Improve static type checking by using type enum req_op instead of int where appropriate. Cc: Richard Weinberger Cc: Anton Ivanov Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-21-bvanassche@acm.org Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 479b79e11442..eb2d2f0f0bcc 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1262,7 +1262,7 @@ static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, struct req_iterator iter; int i = 0; unsigned long byte_offset = io_req->offset; - int op = req_op(req); + enum req_op op = req_op(req); if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { io_req->io_desc[0].buffer = NULL; @@ -1325,7 +1325,7 @@ static int ubd_submit_request(struct ubd *dev, struct request *req) int segs = 0; struct io_thread_req *io_req; int ret; - int op = req_op(req); + enum req_op op = req_op(req); if (op == REQ_OP_FLUSH) segs = 0; From 581075e4f6475bb97c73ecccf68636a9453a31fd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:47 -0700 Subject: [PATCH 112/178] dm/core: Reduce the size of struct dm_io_request Combine the bi_op and bi_op_flags into the bi_opf member. Use the new blk_opf_t type to improve static type checking. This patch does not change any functionality. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-22-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-bufio.c | 9 +++------ drivers/md/dm-integrity.c | 15 +++++---------- drivers/md/dm-io.c | 10 ++++++---- drivers/md/dm-kcopyd.c | 3 +-- drivers/md/dm-log.c | 6 ++---- drivers/md/dm-raid1.c | 12 +++++------- drivers/md/dm-snap-persistent.c | 3 +-- drivers/md/dm-writecache.c | 12 ++++-------- include/linux/dm-io.h | 4 ++-- 9 files changed, 29 insertions(+), 45 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5ffa1dcf84cf..1b7acda45c78 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -582,8 +582,7 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, { int r; struct dm_io_request io_req = { - .bi_op = rw, - .bi_op_flags = 0, + .bi_opf = rw, .notify.fn = dmio_complete, .notify.context = b, .client = b->c->dm_io, @@ -1341,8 +1340,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers); int dm_bufio_issue_flush(struct dm_bufio_client *c) { struct dm_io_request io_req = { - .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, @@ -1365,8 +1363,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_issue_flush); int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count) { struct dm_io_request io_req = { - .bi_op = REQ_OP_DISCARD, - .bi_op_flags = REQ_SYNC, + .bi_opf = REQ_OP_DISCARD | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = c->dm_io, diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 148978ad03a8..2ccc103dea1e 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -557,8 +557,7 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) struct dm_io_region io_loc; int r; - io_req.bi_op = op; - io_req.bi_op_flags = op_flags; + io_req.bi_opf = op | op_flags; io_req.mem.type = DM_IO_KMEM; io_req.mem.ptr.addr = ic->sb; io_req.notify.fn = NULL; @@ -1067,8 +1066,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); - io_req.bi_op = op; - io_req.bi_op_flags = op_flags; + io_req.bi_opf = op | op_flags; io_req.mem.type = DM_IO_PAGE_LIST; if (ic->journal_io) io_req.mem.ptr.pl = &ic->journal_io[pl_index]; @@ -1188,8 +1186,7 @@ static void copy_from_journal(struct dm_integrity_c *ic, unsigned section, unsig pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); - io_req.bi_op = REQ_OP_WRITE; - io_req.bi_op_flags = 0; + io_req.bi_opf = REQ_OP_WRITE; io_req.mem.type = DM_IO_PAGE_LIST; io_req.mem.ptr.pl = &ic->journal[pl_index]; io_req.mem.offset = pl_offset; @@ -1516,8 +1513,7 @@ static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_dat if (!ic->meta_dev) flush_data = false; if (flush_data) { - fr.io_req.bi_op = REQ_OP_WRITE, - fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + fr.io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, fr.io_req.mem.type = DM_IO_KMEM, fr.io_req.mem.ptr.addr = NULL, fr.io_req.notify.fn = flush_notify, @@ -2706,8 +2702,7 @@ next_chunk: if (unlikely(dm_integrity_failed(ic))) goto err; - io_req.bi_op = REQ_OP_READ; - io_req.bi_op_flags = 0; + io_req.bi_opf = REQ_OP_READ; io_req.mem.type = DM_IO_VMA; io_req.mem.ptr.addr = ic->recalc_buffer; io_req.notify.fn = NULL; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index e4b95eaeec8c..0606e00d1817 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -489,7 +489,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp, case DM_IO_VMA: flush_kernel_vmap_range(io_req->mem.ptr.vma, size); - if (io_req->bi_op == REQ_OP_READ) { + if ((io_req->bi_opf & REQ_OP_MASK) == REQ_OP_READ) { dp->vma_invalidate_address = io_req->mem.ptr.vma; dp->vma_invalidate_size = size; } @@ -519,11 +519,13 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, - io_req->bi_op, io_req->bi_op_flags, &dp, + io_req->bi_opf & REQ_OP_MASK, + io_req->bi_opf & ~REQ_OP_MASK, &dp, sync_error_bits); - return async_io(io_req->client, num_regions, where, io_req->bi_op, - io_req->bi_op_flags, &dp, io_req->notify.fn, + return async_io(io_req->client, num_regions, where, + io_req->bi_opf & REQ_OP_MASK, + io_req->bi_opf & ~REQ_OP_MASK, &dp, io_req->notify.fn, io_req->notify.context); } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 37b03ab7e5c9..a99b994e2b62 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -549,8 +549,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_op = job->rw, - .bi_op_flags = 0, + .bi_opf = job->rw, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = 0, diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 0c6620e7b7bf..56ad13f9347b 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -293,8 +293,7 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis static int rw_header(struct log_c *lc, int op) { - lc->io_req.bi_op = op; - lc->io_req.bi_op_flags = 0; + lc->io_req.bi_opf = op; return dm_io(&lc->io_req, 1, &lc->header_location, NULL); } @@ -307,8 +306,7 @@ static int flush_header(struct log_c *lc) .count = 0, }; - lc->io_req.bi_op = REQ_OP_WRITE; - lc->io_req.bi_op_flags = REQ_PREFLUSH; + lc->io_req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; return dm_io(&lc->io_req, 1, &null_location, NULL); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 8811d484fdd1..06a38dc32025 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -260,8 +260,7 @@ static int mirror_flush(struct dm_target *ti) struct dm_io_region io[MAX_NR_MIRRORS]; struct mirror *m; struct dm_io_request io_req = { - .bi_op = REQ_OP_WRITE, - .bi_op_flags = REQ_PREFLUSH | REQ_SYNC, + .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC, .mem.type = DM_IO_KMEM, .mem.ptr.addr = NULL, .client = ms->io_client, @@ -535,8 +534,7 @@ static void read_async_bio(struct mirror *m, struct bio *bio) { struct dm_io_region io; struct dm_io_request io_req = { - .bi_op = REQ_OP_READ, - .bi_op_flags = 0, + .bi_opf = REQ_OP_READ, .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = read_callback, @@ -648,9 +646,9 @@ static void do_write(struct mirror_set *ms, struct bio *bio) unsigned int i; struct dm_io_region io[MAX_NR_MIRRORS], *dest = io; struct mirror *m; + blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH); struct dm_io_request io_req = { - .bi_op = REQ_OP_WRITE, - .bi_op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH), + .bi_opf = REQ_OP_WRITE | op_flags, .mem.type = DM_IO_BIO, .mem.ptr.bio = bio, .notify.fn = write_callback, @@ -659,7 +657,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio) }; if (bio_op(bio) == REQ_OP_DISCARD) { - io_req.bi_op = REQ_OP_DISCARD; + io_req.bi_opf = REQ_OP_DISCARD | op_flags; io_req.mem.type = DM_IO_KMEM; io_req.mem.ptr.addr = NULL; } diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 3bb5cff5d6fc..eaf969de3d3a 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -235,8 +235,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op, .count = ps->store->chunk_size, }; struct dm_io_request io_req = { - .bi_op = op, - .bi_op_flags = op_flags, + .bi_opf = op | op_flags, .mem.type = DM_IO_VMA, .mem.ptr.vma = area, .client = ps->io_client, diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index d74c5a7a0ab4..2b994b3e22a7 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -523,8 +523,7 @@ static void ssd_commit_flushed(struct dm_writecache *wc, bool wait_for_ios) region.sector += wc->start_sector; atomic_inc(&endio.count); - req.bi_op = REQ_OP_WRITE; - req.bi_op_flags = REQ_SYNC; + req.bi_opf = REQ_OP_WRITE | REQ_SYNC; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map + (size_t)i * BITMAP_GRANULARITY; req.client = wc->dm_io; @@ -562,8 +561,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) region.sector += wc->start_sector; - req.bi_op = REQ_OP_WRITE; - req.bi_op_flags = REQ_SYNC | REQ_FUA; + req.bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_FUA; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map; req.client = wc->dm_io; @@ -592,8 +590,7 @@ static void writecache_disk_flush(struct dm_writecache *wc, struct dm_dev *dev) region.bdev = dev->bdev; region.sector = 0; region.count = 0; - req.bi_op = REQ_OP_WRITE; - req.bi_op_flags = REQ_PREFLUSH; + req.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; req.mem.type = DM_IO_KMEM; req.mem.ptr.addr = NULL; req.client = wc->dm_io; @@ -981,8 +978,7 @@ static int writecache_read_metadata(struct dm_writecache *wc, sector_t n_sectors region.bdev = wc->ssd_dev->bdev; region.sector = wc->start_sector; region.count = n_sectors; - req.bi_op = REQ_OP_READ; - req.bi_op_flags = REQ_SYNC; + req.bi_opf = REQ_OP_READ | REQ_SYNC; req.mem.type = DM_IO_VMA; req.mem.ptr.vma = (char *)wc->memory_map; req.client = wc->dm_io; diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index a52c6580cc9a..8e1c4ab5df04 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -13,6 +13,7 @@ #ifdef __KERNEL__ #include +#include struct dm_io_region { struct block_device *bdev; @@ -57,8 +58,7 @@ struct dm_io_notify { */ struct dm_io_client; struct dm_io_request { - int bi_op; /* REQ_OP */ - int bi_op_flags; /* req_flag_bits */ + blk_opf_t bi_opf; /* Request type and flags */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ From 71f7113d20ae1083e66ce3301f387362184cdd96 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:48 -0700 Subject: [PATCH 113/178] dm/core: Rename kcopyd_job.rw into kcopyd.op The member name 'rw' suggests that this member either has the value 'READ' or 'WRITE' and no other values. Since that member also can have the value REQ_OP_WRITE_ZEROES, rename 'rw' into 'op'. This patch does not change any functionality since REQ_OP_READ = READ = 0 and REQ_OP_WRITE = WRITE = 1. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-23-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-kcopyd.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index a99b994e2b62..9c8f3544e99d 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -350,9 +350,9 @@ struct kcopyd_job { unsigned long write_err; /* - * Either READ or WRITE + * REQ_OP_READ, REQ_OP_WRITE or REQ_OP_WRITE_ZEROES. */ - int rw; + enum req_op op; struct dm_io_region source; /* @@ -418,7 +418,8 @@ static struct kcopyd_job *pop_io_job(struct list_head *jobs, * constraint and sequential writes that are at the right position. */ list_for_each_entry(job, jobs, list) { - if (job->rw == READ || !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { + if (job->op == REQ_OP_READ || + !(job->flags & BIT(DM_KCOPYD_WRITE_SEQ))) { list_del(&job->list); return job; } @@ -518,7 +519,7 @@ static void complete_io(unsigned long error, void *context) io_job_finish(kc->throttle); if (error) { - if (op_is_write(job->rw)) + if (op_is_write(job->op)) job->write_err |= error; else job->read_err = 1; @@ -530,11 +531,11 @@ static void complete_io(unsigned long error, void *context) } } - if (op_is_write(job->rw)) + if (op_is_write(job->op)) push(&kc->complete_jobs, job); else { - job->rw = WRITE; + job->op = REQ_OP_WRITE; push(&kc->io_jobs, job); } @@ -549,7 +550,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_opf = job->rw, + .bi_opf = job->op, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = 0, @@ -570,7 +571,7 @@ static int run_io_job(struct kcopyd_job *job) io_job_start(job->kc->throttle); - if (job->rw == READ) + if (job->op == REQ_OP_READ) r = dm_io(&io_req, 1, &job->source, NULL); else r = dm_io(&io_req, job->num_dests, job->dests, NULL); @@ -613,7 +614,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (op_is_write(job->rw)) + if (op_is_write(job->op)) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -816,7 +817,7 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, if (from) { job->source = *from; job->pages = NULL; - job->rw = READ; + job->op = REQ_OP_READ; } else { memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; @@ -825,10 +826,10 @@ void dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, /* * Use WRITE ZEROES to optimize zeroing if all dests support it. */ - job->rw = REQ_OP_WRITE_ZEROES; + job->op = REQ_OP_WRITE_ZEROES; for (i = 0; i < job->num_dests; i++) if (!bdev_write_zeroes_sectors(job->dests[i].bdev)) { - job->rw = WRITE; + job->op = REQ_OP_WRITE; break; } } From a3282b432f64e9b88632bd380c90157673dce75b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:49 -0700 Subject: [PATCH 114/178] dm/core: Combine request operation type and flags Improve kernel code uniformity by combining the request operation type and flags into a single variable. Change 'int rw' into 'enum req_op op' because the name 'op' is what is used in the block layer to hold a request type. Use the blk_opf_t and enum req_op types where appropriate to improve static type checking. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-24-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-bufio.c | 19 ++++++++++--------- drivers/md/dm-io.c | 36 +++++++++++++++++------------------- drivers/md/dm.c | 10 +++++----- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 1b7acda45c78..dc01ce33265b 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -577,12 +577,12 @@ static void dmio_complete(unsigned long error, void *context) b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0); } -static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, +static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector, unsigned n_sectors, unsigned offset) { int r; struct dm_io_request io_req = { - .bi_opf = rw, + .bi_opf = op, .notify.fn = dmio_complete, .notify.context = b, .client = b->c->dm_io, @@ -615,7 +615,7 @@ static void bio_complete(struct bio *bio) b->end_io(b, status); } -static void use_bio(struct dm_buffer *b, int rw, sector_t sector, +static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector, unsigned n_sectors, unsigned offset) { struct bio *bio; @@ -629,10 +629,10 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector, bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN); if (!bio) { dmio: - use_dmio(b, rw, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset); return; } - bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, rw); + bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, op); bio->bi_iter.bi_sector = sector; bio->bi_end_io = bio_complete; bio->bi_private = b; @@ -668,7 +668,8 @@ static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block return sector; } -static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t)) +static void submit_io(struct dm_buffer *b, enum req_op op, + void (*end_io)(struct dm_buffer *, blk_status_t)) { unsigned n_sectors; sector_t sector; @@ -678,7 +679,7 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff sector = block_to_sector(b->c, b->block); - if (rw != REQ_OP_WRITE) { + if (op != REQ_OP_WRITE) { n_sectors = b->c->block_size >> SECTOR_SHIFT; offset = 0; } else { @@ -697,9 +698,9 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff } if (b->data_mode != DATA_MODE_VMALLOC) - use_bio(b, rw, sector, n_sectors, offset); + use_bio(b, op, sector, n_sectors, offset); else - use_dmio(b, rw, sector, n_sectors, offset); + use_dmio(b, op, sector, n_sectors, offset); } /*---------------------------------------------------------------- diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 0606e00d1817..783564533459 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -293,7 +293,7 @@ static void km_dp_init(struct dpages *dp, void *data) /*----------------------------------------------------------------- * IO routines that accept a list of pages. *---------------------------------------------------------------*/ -static void do_region(int op, int op_flags, unsigned region, +static void do_region(const blk_opf_t opf, unsigned region, struct dm_io_region *where, struct dpages *dp, struct io *io) { @@ -306,6 +306,7 @@ static void do_region(int op, int op_flags, unsigned region, struct request_queue *q = bdev_get_queue(where->bdev); sector_t num_sectors; unsigned int special_cmd_max_sectors; + const enum req_op op = opf & REQ_OP_MASK; /* * Reject unsupported discard and write same requests. @@ -339,8 +340,8 @@ static void do_region(int op, int op_flags, unsigned region, (PAGE_SIZE >> SECTOR_SHIFT))); } - bio = bio_alloc_bioset(where->bdev, num_bvecs, op | op_flags, - GFP_NOIO, &io->client->bios); + bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO, + &io->client->bios); bio->bi_iter.bi_sector = where->sector + (where->count - remaining); bio->bi_end_io = endio; store_io_and_region_in_bio(bio, io, region); @@ -368,7 +369,7 @@ static void do_region(int op, int op_flags, unsigned region, } while (remaining); } -static void dispatch_io(int op, int op_flags, unsigned int num_regions, +static void dispatch_io(blk_opf_t opf, unsigned int num_regions, struct dm_io_region *where, struct dpages *dp, struct io *io, int sync) { @@ -378,7 +379,7 @@ static void dispatch_io(int op, int op_flags, unsigned int num_regions, BUG_ON(num_regions > DM_IO_MAX_REGIONS); if (sync) - op_flags |= REQ_SYNC; + opf |= REQ_SYNC; /* * For multiple regions we need to be careful to rewind @@ -386,8 +387,8 @@ static void dispatch_io(int op, int op_flags, unsigned int num_regions, */ for (i = 0; i < num_regions; i++) { *dp = old_pages; - if (where[i].count || (op_flags & REQ_PREFLUSH)) - do_region(op, op_flags, i, where + i, dp, io); + if (where[i].count || (opf & REQ_PREFLUSH)) + do_region(opf, i, where + i, dp, io); } /* @@ -411,13 +412,13 @@ static void sync_io_complete(unsigned long error, void *context) } static int sync_io(struct dm_io_client *client, unsigned int num_regions, - struct dm_io_region *where, int op, int op_flags, - struct dpages *dp, unsigned long *error_bits) + struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, + unsigned long *error_bits) { struct io *io; struct sync_io sio; - if (num_regions > 1 && !op_is_write(op)) { + if (num_regions > 1 && !op_is_write(opf)) { WARN_ON(1); return -EIO; } @@ -434,7 +435,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(op, op_flags, num_regions, where, dp, io, 1); + dispatch_io(opf, num_regions, where, dp, io, 1); wait_for_completion_io(&sio.wait); @@ -445,12 +446,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, } static int async_io(struct dm_io_client *client, unsigned int num_regions, - struct dm_io_region *where, int op, int op_flags, + struct dm_io_region *where, blk_opf_t opf, struct dpages *dp, io_notify_fn fn, void *context) { struct io *io; - if (num_regions > 1 && !op_is_write(op)) { + if (num_regions > 1 && !op_is_write(opf)) { WARN_ON(1); fn(1, context); return -EIO; @@ -466,7 +467,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io->vma_invalidate_address = dp->vma_invalidate_address; io->vma_invalidate_size = dp->vma_invalidate_size; - dispatch_io(op, op_flags, num_regions, where, dp, io, 0); + dispatch_io(opf, num_regions, where, dp, io, 0); return 0; } @@ -519,13 +520,10 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions, if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, - io_req->bi_opf & REQ_OP_MASK, - io_req->bi_opf & ~REQ_OP_MASK, &dp, - sync_error_bits); + io_req->bi_opf, &dp, sync_error_bits); return async_io(io_req->client, num_regions, where, - io_req->bi_opf & REQ_OP_MASK, - io_req->bi_opf & ~REQ_OP_MASK, &dp, io_req->notify.fn, + io_req->bi_opf, &dp, io_req->notify.fn, io_req->notify.context); } EXPORT_SYMBOL(dm_io); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6c21922b87d0..54c2a23f4e55 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -716,7 +716,7 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) } static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md, - int *srcu_idx, unsigned bio_opf) + int *srcu_idx, blk_opf_t bio_opf) { if (bio_opf & REQ_NOWAIT) return dm_get_live_table_fast(md); @@ -725,7 +725,7 @@ static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md, } static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx, - unsigned bio_opf) + blk_opf_t bio_opf) { if (bio_opf & REQ_NOWAIT) dm_put_live_table_fast(md); @@ -1511,7 +1511,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target static bool is_abnormal_io(struct bio *bio) { - unsigned int op = bio_op(bio); + enum req_op op = bio_op(bio); if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) { switch (op) { @@ -1625,7 +1625,7 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci) * Only support bio polling for normal IO, and the target io is * exactly inside the dm_io instance (verified in dm_poll_dm_io) */ - ci->submit_as_polled = ci->bio->bi_opf & REQ_POLLED; + ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED); len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); setup_split_accounting(ci, len); @@ -1722,7 +1722,7 @@ static void dm_submit_bio(struct bio *bio) struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; int srcu_idx; struct dm_table *map; - unsigned bio_opf = bio->bi_opf; + blk_opf_t bio_opf = bio->bi_opf; map = dm_get_live_table_bio(md, &srcu_idx, bio_opf); From 67a7b9a5b54fa3a1b9e4ab5b9808198680cba082 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:50 -0700 Subject: [PATCH 115/178] dm/ebs: Change 'int rw' into 'enum req_op op' Improve static type checking by using type 'enum req_op' instead of 'int'. Make the role of the 'rw' arguments more clear by renaming these into 'op' (operation). This patch does not change any functionality since REQ_OP_READ = READ = 0 and REQ_OP_WRITE = WRITE = 1. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Heinz Mauelshagen Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-25-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-ebs-target.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c index 0221fa63f888..223e8e1a7a13 100644 --- a/drivers/md/dm-ebs-target.c +++ b/drivers/md/dm-ebs-target.c @@ -61,7 +61,8 @@ static inline bool __ebs_check_bs(unsigned int bs) * * copy blocks between bufio blocks and bio vector's (partial/overlapping) pages. */ -static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bvec_iter *iter) +static int __ebs_rw_bvec(struct ebs_c *ec, enum req_op op, struct bio_vec *bv, + struct bvec_iter *iter) { int r = 0; unsigned char *ba, *pa; @@ -81,7 +82,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv cur_len = min(dm_bufio_get_block_size(ec->bufio) - buf_off, bv_len); /* Avoid reading for writes in case bio vector's page overwrites block completely. */ - if (rw == READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio)) + if (op == REQ_OP_READ || buf_off || bv_len < dm_bufio_get_block_size(ec->bufio)) ba = dm_bufio_read(ec->bufio, block, &b); else ba = dm_bufio_new(ec->bufio, block, &b); @@ -95,7 +96,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv } else { /* Copy data to/from bio to buffer if read/new was successful above. */ ba += buf_off; - if (rw == READ) { + if (op == REQ_OP_READ) { memcpy(pa, ba, cur_len); flush_dcache_page(bv->bv_page); } else { @@ -117,14 +118,14 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv } /* READ/WRITE: iterate bio vector's copying between (partial) pages and bufio blocks. */ -static int __ebs_rw_bio(struct ebs_c *ec, int rw, struct bio *bio) +static int __ebs_rw_bio(struct ebs_c *ec, enum req_op op, struct bio *bio) { int r = 0, rr; struct bio_vec bv; struct bvec_iter iter; bio_for_each_bvec(bv, bio, iter) { - rr = __ebs_rw_bvec(ec, rw, &bv, &iter); + rr = __ebs_rw_bvec(ec, op, &bv, &iter); if (rr) r = rr; } @@ -205,10 +206,10 @@ static void __ebs_process_bios(struct work_struct *ws) bio_list_for_each(bio, &bios) { r = -EIO; if (bio_op(bio) == REQ_OP_READ) - r = __ebs_rw_bio(ec, READ, bio); + r = __ebs_rw_bio(ec, REQ_OP_READ, bio); else if (bio_op(bio) == REQ_OP_WRITE) { write = true; - r = __ebs_rw_bio(ec, WRITE, bio); + r = __ebs_rw_bio(ec, REQ_OP_WRITE, bio); } else if (bio_op(bio) == REQ_OP_DISCARD) { __ebs_forget_bio(ec, bio); r = __ebs_discard_bio(ec, bio); From eff17e5161feda42c64b1402e86724649927bcde Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:51 -0700 Subject: [PATCH 116/178] dm/dm-flakey: Use the new blk_opf_t type Use the new blk_opf_t type for structure members that represent request flags. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Josef Bacik Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-26-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-flakey.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index f2305eb758a2..89fa7a68c6c4 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -32,7 +32,7 @@ struct flakey_c { unsigned corrupt_bio_byte; unsigned corrupt_bio_rw; unsigned corrupt_bio_value; - unsigned corrupt_bio_flags; + blk_opf_t corrupt_bio_flags; }; enum feature_flag_bits { @@ -145,7 +145,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, /* * Only corrupt bios with these flags set. */ - r = dm_read_arg(_args + 3, as, &fc->corrupt_bio_flags, &ti->error); + BUILD_BUG_ON(sizeof(fc->corrupt_bio_flags) != + sizeof(unsigned int)); + r = dm_read_arg(_args + 3, as, + (__force unsigned *)&fc->corrupt_bio_flags, + &ti->error); if (r) return r; argc--; From c9154a4cb8dc6a1bca4158174fedecf98de7580d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:52 -0700 Subject: [PATCH 117/178] dm/dm-integrity: Combine request operation and flags Combine the request operation type and request flags into a single argument. Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Eric Biggers Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-27-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-integrity.c | 63 +++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 2ccc103dea1e..c60f9b2ece2d 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -551,13 +551,14 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr) return 0; } -static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags) +static int sync_rw_sb(struct dm_integrity_c *ic, blk_opf_t opf) { struct dm_io_request io_req; struct dm_io_region io_loc; + const enum req_op op = opf & REQ_OP_MASK; int r; - io_req.bi_opf = op | op_flags; + io_req.bi_opf = opf; io_req.mem.type = DM_IO_KMEM; io_req.mem.ptr.addr = ic->sb; io_req.notify.fn = NULL; @@ -1049,8 +1050,9 @@ static void complete_journal_io(unsigned long error, void *context) complete_journal_op(comp); } -static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, - unsigned sector, unsigned n_sectors, struct journal_completion *comp) +static void rw_journal_sectors(struct dm_integrity_c *ic, blk_opf_t opf, + unsigned sector, unsigned n_sectors, + struct journal_completion *comp) { struct dm_io_request io_req; struct dm_io_region io_loc; @@ -1066,7 +1068,7 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT); pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1); - io_req.bi_opf = op | op_flags; + io_req.bi_opf = opf; io_req.mem.type = DM_IO_PAGE_LIST; if (ic->journal_io) io_req.mem.ptr.pl = &ic->journal_io[pl_index]; @@ -1086,7 +1088,8 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, r = dm_io(&io_req, 1, &io_loc, NULL); if (unlikely(r)) { - dm_integrity_io_error(ic, op == REQ_OP_READ ? "reading journal" : "writing journal", r); + dm_integrity_io_error(ic, (opf & REQ_OP_MASK) == REQ_OP_READ ? + "reading journal" : "writing journal", r); if (comp) { WARN_ONCE(1, "asynchronous dm_io failed: %d", r); complete_journal_io(-1UL, comp); @@ -1094,15 +1097,16 @@ static void rw_journal_sectors(struct dm_integrity_c *ic, int op, int op_flags, } } -static void rw_journal(struct dm_integrity_c *ic, int op, int op_flags, unsigned section, - unsigned n_sections, struct journal_completion *comp) +static void rw_journal(struct dm_integrity_c *ic, blk_opf_t opf, + unsigned section, unsigned n_sections, + struct journal_completion *comp) { unsigned sector, n_sectors; sector = section * ic->journal_section_sectors; n_sectors = n_sections * ic->journal_section_sectors; - rw_journal_sectors(ic, op, op_flags, sector, n_sectors, comp); + rw_journal_sectors(ic, opf, sector, n_sectors, comp); } static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsigned commit_sections) @@ -1127,7 +1131,7 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi for (i = 0; i < commit_sections; i++) rw_section_mac(ic, commit_start + i, true); } - rw_journal(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, commit_start, + rw_journal(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, commit_start, commit_sections, &io_comp); } else { unsigned to_end; @@ -1139,7 +1143,8 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, commit_start, to_end, &crypt_comp_1); if (try_wait_for_completion(&crypt_comp_1.comp)) { - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, + commit_start, to_end, &io_comp); reinit_completion(&crypt_comp_1.comp); crypt_comp_1.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_1); @@ -1150,17 +1155,17 @@ static void write_journal(struct dm_integrity_c *ic, unsigned commit_start, unsi crypt_comp_2.in_flight = (atomic_t)ATOMIC_INIT(0); encrypt_journal(ic, true, 0, commit_sections - to_end, &crypt_comp_2); wait_for_completion_io(&crypt_comp_1.comp); - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); wait_for_completion_io(&crypt_comp_2.comp); } } else { for (i = 0; i < to_end; i++) rw_section_mac(ic, commit_start + i, true); - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, commit_start, to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, commit_start, to_end, &io_comp); for (i = 0; i < commit_sections - to_end; i++) rw_section_mac(ic, i, true); } - rw_journal(ic, REQ_OP_WRITE, REQ_FUA, 0, commit_sections - to_end, &io_comp); + rw_journal(ic, REQ_OP_WRITE | REQ_FUA, 0, commit_sections - to_end, &io_comp); } wait_for_completion_io(&io_comp.comp); @@ -2622,7 +2627,7 @@ static void recalc_write_super(struct dm_integrity_c *ic) if (dm_integrity_failed(ic)) return; - r = sync_rw_sb(ic, REQ_OP_WRITE, 0); + r = sync_rw_sb(ic, REQ_OP_WRITE); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); } @@ -2795,7 +2800,7 @@ static void bitmap_block_work(struct work_struct *w) if (bio_list_empty(&waiting)) return; - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL); @@ -2841,7 +2846,7 @@ static void bitmap_flush_work(struct work_struct *work) block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR); block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR); - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); spin_lock_irq(&ic->endio_wait.lock); @@ -2913,7 +2918,7 @@ static void replay_journal(struct dm_integrity_c *ic) if (!ic->just_formatted) { DEBUG_print("reading journal\n"); - rw_journal(ic, REQ_OP_READ, 0, 0, ic->journal_sections, NULL); + rw_journal(ic, REQ_OP_READ, 0, ic->journal_sections, NULL); if (ic->journal_io) DEBUG_bytes(lowmem_page_address(ic->journal_io[0].page), 64, "read journal"); if (ic->journal_io) { @@ -3108,7 +3113,7 @@ static void dm_integrity_postsuspend(struct dm_target *ti) /* set to 0 to test bitmap replay code */ init_journal(ic, 0, ic->journal_sections, 0); ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); #endif @@ -3131,23 +3136,23 @@ static void dm_integrity_resume(struct dm_target *ti) if (ic->provided_data_sectors > old_provided_data_sectors && ic->mode == 'B' && ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) { - rw_journal_sectors(ic, REQ_OP_READ, 0, 0, + rw_journal_sectors(ic, REQ_OP_READ, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); block_bitmap_op(ic, ic->journal, old_provided_data_sectors, ic->provided_data_sectors - old_provided_data_sectors, BITMAP_OP_SET); - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); } ic->sb->provided_data_sectors = cpu_to_le64(ic->provided_data_sectors); - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); } if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) { DEBUG_print("resume dirty_bitmap\n"); - rw_journal_sectors(ic, REQ_OP_READ, 0, 0, + rw_journal_sectors(ic, REQ_OP_READ, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); if (ic->mode == 'B') { if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit && @@ -3166,7 +3171,7 @@ static void dm_integrity_resume(struct dm_target *ti) block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET); block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET); - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); ic->sb->recalc_sector = cpu_to_le64(0); @@ -3182,7 +3187,7 @@ static void dm_integrity_resume(struct dm_target *ti) replay_journal(ic); ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP); } - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); } else { @@ -3194,7 +3199,7 @@ static void dm_integrity_resume(struct dm_target *ti) if (ic->mode == 'B') { ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP); ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit; - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (unlikely(r)) dm_integrity_io_error(ic, "writing superblock", r); @@ -3210,7 +3215,7 @@ static void dm_integrity_resume(struct dm_target *ti) block_bitmap_op(ic, ic->may_write_bitmap, le64_to_cpu(ic->sb->recalc_sector), ic->provided_data_sectors - le64_to_cpu(ic->sb->recalc_sector), BITMAP_OP_SET); } - rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, + rw_journal_sectors(ic, REQ_OP_WRITE | REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL); } } @@ -4251,7 +4256,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - r = sync_rw_sb(ic, REQ_OP_READ, 0); + r = sync_rw_sb(ic, REQ_OP_READ); if (r) { ti->error = "Error reading superblock"; goto bad; @@ -4495,7 +4500,7 @@ try_smaller_buffer: ti->error = "Error initializing journal"; goto bad; } - r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA); + r = sync_rw_sb(ic, REQ_OP_WRITE | REQ_FUA); if (r) { ti->error = "Error initializing superblock"; goto bad; From c1389b33332ee09e8981a21a8abb812d93ca253f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:53 -0700 Subject: [PATCH 118/178] dm mirror log: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for a function argument that represents a request operation type. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-28-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 56ad13f9347b..cf10fa667797 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -291,7 +291,7 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis core->nr_regions = le64_to_cpu(disk->nr_regions); } -static int rw_header(struct log_c *lc, int op) +static int rw_header(struct log_c *lc, enum req_op op) { lc->io_req.bi_opf = op; From 6b9901395702c34c3ef0fe63573fcf69192244ea Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:54 -0700 Subject: [PATCH 119/178] dm-snap: Combine request operation type and flags Pass the request operation and its flags as a single argument to improve kernel code uniformity. Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Mikulas Patocka Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-29-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-snap-persistent.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index eaf969de3d3a..f46f930eedf9 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -226,8 +226,8 @@ static void do_metadata(struct work_struct *work) /* * Read or write a chunk aligned and sized block of data from a device. */ -static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op, - int op_flags, int metadata) +static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf, + int metadata) { struct dm_io_region where = { .bdev = dm_snap_cow(ps->store->snap)->bdev, @@ -235,7 +235,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op, .count = ps->store->chunk_size, }; struct dm_io_request io_req = { - .bi_opf = op | op_flags, + .bi_opf = opf, .mem.type = DM_IO_VMA, .mem.ptr.vma = area, .client = ps->io_client, @@ -281,11 +281,11 @@ static void skip_metadata(struct pstore *ps) * Read or write a metadata area. Remembering to skip the first * chunk which holds the header. */ -static int area_io(struct pstore *ps, int op, int op_flags) +static int area_io(struct pstore *ps, blk_opf_t opf) { chunk_t chunk = area_location(ps, ps->current_area); - return chunk_io(ps, ps->area, chunk, op, op_flags, 0); + return chunk_io(ps, ps->area, chunk, opf, 0); } static void zero_memory_area(struct pstore *ps) @@ -296,7 +296,7 @@ static void zero_memory_area(struct pstore *ps) static int zero_disk_area(struct pstore *ps, chunk_t area) { return chunk_io(ps, ps->zero_area, area_location(ps, area), - REQ_OP_WRITE, 0, 0); + REQ_OP_WRITE, 0); } static int read_header(struct pstore *ps, int *new_snapshot) @@ -328,7 +328,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) if (r) return r; - r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1); + r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 1); if (r) goto bad; @@ -389,7 +389,7 @@ static int write_header(struct pstore *ps) dh->version = cpu_to_le32(ps->version); dh->chunk_size = cpu_to_le32(ps->store->chunk_size); - return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1); + return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 1); } /* @@ -733,8 +733,8 @@ static void persistent_commit_exception(struct dm_exception_store *store, /* * Commit exceptions to disk. */ - if (ps->valid && area_io(ps, REQ_OP_WRITE, - REQ_PREFLUSH | REQ_FUA | REQ_SYNC)) + if (ps->valid && area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | + REQ_SYNC)) ps->valid = 0; /* @@ -774,7 +774,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store, return 0; ps->current_area--; - r = area_io(ps, REQ_OP_READ, 0); + r = area_io(ps, REQ_OP_READ); if (r < 0) return r; ps->current_committed = ps->exceptions_per_area; @@ -811,7 +811,7 @@ static int persistent_commit_merge(struct dm_exception_store *store, for (i = 0; i < nr_merged; i++) clear_exception(ps, ps->current_committed - 1 - i); - r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA); + r = area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA); if (r < 0) return r; From 8a5a7ce8774ce9d2fb52df6ecb0d234cf76811d1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:55 -0700 Subject: [PATCH 120/178] dm/zone: Use the enum req_op type Use the enum req_op type for request operations instead of unsigned int. This patch fixes a sparse warning that has been introduced by making enum req_op __bitwise. Reviewed-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-30-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-zone.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c index 2b89cde30c9e..4d10f302c62e 100644 --- a/drivers/md/dm-zone.c +++ b/drivers/md/dm-zone.c @@ -359,7 +359,7 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno, } struct orig_bio_details { - unsigned int op; + enum req_op op; unsigned int nr_sectors; }; From 13a1f650b6ec935834977461b87585f6387257b4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:56 -0700 Subject: [PATCH 121/178] dm/dm-zoned: Use the enum req_op type Improve static type checking by using the enum req_op type for arguments that represent a request operation. Reviewed-by: Damien Le Moal Cc: Alasdair Kergon Cc: Mike Snitzer Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-31-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-zoned-metadata.c | 5 +++-- drivers/md/dm-zoned.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index d1ea66114d14..34db364c23a8 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -737,7 +737,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, /* * Read/write a metadata block. */ -static int dmz_rdwr_block(struct dmz_dev *dev, int op, +static int dmz_rdwr_block(struct dmz_dev *dev, enum req_op op, sector_t block, struct page *page) { struct bio *bio; @@ -2045,7 +2045,8 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, * allocated and used to map the chunk. * The zone returned will be set to the active state. */ -struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, int op) +struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, + unsigned int chunk, enum req_op op) { struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT]; struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index a02744a0846c..265494d3f711 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -248,7 +248,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int dev_idx, bool idle); struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, - unsigned int chunk, int op); + unsigned int chunk, enum req_op op); void dmz_put_chunk_mapping(struct dmz_metadata *zmd, struct dm_zone *zone); struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, struct dm_zone *dzone); From 4ce4c73f662bdb0ae5bfb058bc7ec6f6829ca078 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:57 -0700 Subject: [PATCH 122/178] md/core: Combine two sync_page_io() arguments Improve uniformity in the kernel of handling of request operation and flags by passing these as a single argument. Cc: Song Liu Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-32-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/dm-raid.c | 2 +- drivers/md/md-bitmap.c | 2 +- drivers/md/md.c | 10 +++++----- drivers/md/md.h | 3 +-- drivers/md/raid1.c | 8 ++++---- drivers/md/raid10.c | 10 +++++----- drivers/md/raid5-cache.c | 12 ++++++------ drivers/md/raid5-ppl.c | 12 ++++++------ 8 files changed, 29 insertions(+), 30 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9526ccbedafb..fdd6616632c9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2036,7 +2036,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload) rdev->sb_loaded = 0; - if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) { + if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true)) { DMERR("Failed to read superblock of device at position %d", rdev->raid_disk); md_error(rdev->mddev, rdev); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index d87f674ab762..0a21b8317103 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -165,7 +165,7 @@ static int read_sb_page(struct mddev *mddev, loff_t offset, if (sync_page_io(rdev, target, roundup(size, bdev_logical_block_size(rdev->bdev)), - page, REQ_OP_READ, 0, true)) { + page, REQ_OP_READ, true)) { page->index = index; return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 4be9d8173071..4df78e30b76a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -993,15 +993,15 @@ int md_super_wait(struct mddev *mddev) } int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, - struct page *page, int op, int op_flags, bool metadata_op) + struct page *page, blk_opf_t opf, bool metadata_op) { struct bio bio; struct bio_vec bvec; if (metadata_op && rdev->meta_bdev) - bio_init(&bio, rdev->meta_bdev, &bvec, 1, op | op_flags); + bio_init(&bio, rdev->meta_bdev, &bvec, 1, opf); else - bio_init(&bio, rdev->bdev, &bvec, 1, op | op_flags); + bio_init(&bio, rdev->bdev, &bvec, 1, opf); if (metadata_op) bio.bi_iter.bi_sector = sector + rdev->sb_start; @@ -1024,7 +1024,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size) if (rdev->sb_loaded) return 0; - if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) + if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, true)) goto fail; rdev->sb_loaded = 1; return 0; @@ -1722,7 +1722,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return -EINVAL; bb_sector = (long long)offset; if (!sync_page_io(rdev, bb_sector, sectors << 9, - rdev->bb_page, REQ_OP_READ, 0, true)) + rdev->bb_page, REQ_OP_READ, true)) return -EIO; bbp = (__le64 *)page_address(rdev->bb_page); rdev->badblocks.shift = sb->bblog_shift; diff --git a/drivers/md/md.h b/drivers/md/md.h index cf2cbb17acbd..b4f84b27bdef 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -738,8 +738,7 @@ extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern int md_super_wait(struct mddev *mddev); extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, - struct page *page, int op, int op_flags, - bool metadata_op); + struct page *page, blk_opf_t opf, bool metadata_op); extern void md_do_sync(struct md_thread *thread); extern void md_new_event(void); extern void md_allow_write(struct mddev *mddev); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 65cd90f0b2a8..8f1a2e4a6e50 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1988,9 +1988,9 @@ static void end_sync_write(struct bio *bio) } static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector, - int sectors, struct page *page, int rw) + int sectors, struct page *page, int rw) { - if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) /* success */ return 1; if (rw == WRITE) { @@ -2057,7 +2057,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) rdev = conf->mirrors[d].rdev; if (sync_page_io(rdev, sect, s<<9, pages[idx], - REQ_OP_READ, 0, false)) { + REQ_OP_READ, false)) { success = 1; break; } @@ -2305,7 +2305,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, atomic_inc(&rdev->nr_pending); rcu_read_unlock(); if (sync_page_io(rdev, sect, s<<9, - conf->tmppage, REQ_OP_READ, 0, false)) + conf->tmppage, REQ_OP_READ, false)) success = 1; rdev_dec_pending(rdev, mddev); if (success) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a7dcb1bf6b0a..3b80120cba30 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2512,7 +2512,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) addr, s << 9, pages[idx], - REQ_OP_READ, 0, false); + REQ_OP_READ, false); if (ok) { rdev = conf->mirrors[dw].rdev; addr = r10_bio->devs[1].addr + sect; @@ -2520,7 +2520,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio) addr, s << 9, pages[idx], - REQ_OP_WRITE, 0, false); + REQ_OP_WRITE, false); if (!ok) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, @@ -2644,7 +2644,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) return -1; - if (sync_page_io(rdev, sector, sectors << 9, page, rw, 0, false)) + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) /* success */ return 1; if (rw == WRITE) { @@ -2726,7 +2726,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 sect, s<<9, conf->tmppage, - REQ_OP_READ, 0, false); + REQ_OP_READ, false); rdev_dec_pending(rdev, mddev); rcu_read_lock(); if (success) @@ -5107,7 +5107,7 @@ static int handle_reshape_read_error(struct mddev *mddev, addr, s << 9, pages[idx], - REQ_OP_READ, 0, false); + REQ_OP_READ, false); rdev_dec_pending(rdev, mddev); rcu_read_lock(); if (success) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 83c184eddbda..6f2dd73128b0 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1788,7 +1788,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos, mb = page_address(page); mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); - if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE, + if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false)) { __free_page(page); return -EIO; @@ -1898,7 +1898,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, atomic_inc(&rdev->nr_pending); rcu_read_unlock(); sync_page_io(rdev, sh->sector, PAGE_SIZE, - sh->dev[disk_index].page, REQ_OP_WRITE, 0, + sh->dev[disk_index].page, REQ_OP_WRITE, false); rdev_dec_pending(rdev, rdev->mddev); rcu_read_lock(); @@ -1908,7 +1908,7 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, atomic_inc(&rrdev->nr_pending); rcu_read_unlock(); sync_page_io(rrdev, sh->sector, PAGE_SIZE, - sh->dev[disk_index].page, REQ_OP_WRITE, 0, + sh->dev[disk_index].page, REQ_OP_WRITE, false); rdev_dec_pending(rrdev, rrdev->mddev); rcu_read_lock(); @@ -2394,7 +2394,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, PAGE_SIZE)); kunmap_atomic(addr); sync_page_io(log->rdev, write_pos, PAGE_SIZE, - dev->page, REQ_OP_WRITE, 0, false); + dev->page, REQ_OP_WRITE, false); write_pos = r5l_ring_add(log, write_pos, BLOCK_SECTORS); offset += sizeof(__le32) + @@ -2406,7 +2406,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log, mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum, mb, PAGE_SIZE)); sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page, - REQ_OP_WRITE, REQ_SYNC | REQ_FUA, false); + REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false); sh->log_start = ctx->pos; list_add_tail(&sh->r5c, &log->stripe_in_journal_list); atomic_inc(&log->stripe_in_journal_count); @@ -2971,7 +2971,7 @@ static int r5l_load_log(struct r5l_log *log) if (!page) return -ENOMEM; - if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, 0, false)) { + if (!sync_page_io(rdev, cp, PAGE_SIZE, page, REQ_OP_READ, false)) { ret = -EIO; goto ioerr; } diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 0a2e4806b1ec..98988cb26295 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -897,7 +897,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, __func__, indent, "", rdev->bdev, (unsigned long long)sector); if (!sync_page_io(rdev, sector, block_size, page2, - REQ_OP_READ, 0, false)) { + REQ_OP_READ, false)) { md_error(mddev, rdev); pr_debug("%s:%*s read failed!\n", __func__, indent, ""); @@ -919,7 +919,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, (unsigned long long)(ppl_sector + i)); if (!sync_page_io(log->rdev, ppl_sector - log->rdev->data_offset + i, - block_size, page2, REQ_OP_READ, 0, + block_size, page2, REQ_OP_READ, false)) { pr_debug("%s:%*s read failed!\n", __func__, indent, ""); @@ -946,7 +946,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, (unsigned long long)parity_sector, parity_rdev->bdev); if (!sync_page_io(parity_rdev, parity_sector, block_size, - page1, REQ_OP_WRITE, 0, false)) { + page1, REQ_OP_WRITE, false)) { pr_debug("%s:%*s parity write error!\n", __func__, indent, ""); md_error(mddev, parity_rdev); @@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr, int s = pp_size > PAGE_SIZE ? PAGE_SIZE : pp_size; if (!sync_page_io(rdev, sector - rdev->data_offset, - s, page, REQ_OP_READ, 0, false)) { + s, page, REQ_OP_READ, false)) { md_error(mddev, rdev); ret = -EIO; goto out; @@ -1062,7 +1062,7 @@ static int ppl_write_empty_header(struct ppl_log *log) if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset, PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC | - REQ_FUA, 0, false)) { + REQ_FUA, false)) { md_error(rdev->mddev, rdev); ret = -EIO; } @@ -1100,7 +1100,7 @@ static int ppl_load_distributed(struct ppl_log *log) if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset + pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ, - 0, false)) { + false)) { md_error(mddev, rdev); ret = -EIO; /* if not able to read - don't recover any PPL */ From 9a4fd6a22c64cd7e5555d252ef6c5f2c6dce8ec2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:58 -0700 Subject: [PATCH 123/178] md/bcache: Combine two uuid_io() arguments Improve uniformity in the kernel of handling of request operation and flags by passing these as a single argument. Cc: Coly Li Cc: Mingzhe Zou Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-33-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 9dd752d272f6..a2f61a2225d2 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -414,8 +414,8 @@ static void uuid_io_unlock(struct closure *cl) up(&c->uuid_write_mutex); } -static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, - struct bkey *k, struct closure *parent) +static void uuid_io(struct cache_set *c, blk_opf_t opf, struct bkey *k, + struct closure *parent) { struct closure *cl = &c->uuid_write; struct uuid_entry *u; @@ -429,22 +429,22 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, for (i = 0; i < KEY_PTRS(k); i++) { struct bio *bio = bch_bbio_alloc(c); - bio->bi_opf = REQ_SYNC | REQ_META | op_flags; + bio->bi_opf = opf | REQ_SYNC | REQ_META; bio->bi_iter.bi_size = KEY_SIZE(k) << 9; bio->bi_end_io = uuid_endio; bio->bi_private = cl; - bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); bch_bio_map(bio, c->uuids); bch_submit_bbio(bio, c, k, i); - if (op != REQ_OP_WRITE) + if ((opf & REQ_OP_MASK) != REQ_OP_WRITE) break; } bch_extent_to_text(buf, sizeof(buf), k); - pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf); + pr_debug("%s UUIDs at %s\n", (opf & REQ_OP_MASK) == REQ_OP_WRITE ? + "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) @@ -463,7 +463,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl) return "bad uuid pointer"; bkey_copy(&c->uuid_bucket, k); - uuid_io(c, REQ_OP_READ, 0, k, cl); + uuid_io(c, REQ_OP_READ, k, cl); if (j->version < BCACHE_JSET_VERSION_UUIDv1) { struct uuid_entry_v0 *u0 = (void *) c->uuids; @@ -511,7 +511,7 @@ static int __uuid_write(struct cache_set *c) size = meta_bucket_pages(&ca->sb) * PAGE_SECTORS; SET_KEY_SIZE(&k.key, size); - uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl); + uuid_io(c, REQ_OP_WRITE, &k.key, &cl); closure_sync(&cl); /* Only one bucket used for uuid write */ From 552eee3b53f661b76e354ab2ba71e2a625cb9722 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:06:59 -0700 Subject: [PATCH 124/178] md/bcache: Combine two prio_io() arguments Improve uniformity in the kernel of handling of request operation and flags by passing these as a single argument. Cc: Coly Li Cc: Mingzhe Zou Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-34-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/bcache/super.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a2f61a2225d2..ba3909bb6bea 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -587,8 +587,7 @@ static void prio_endio(struct bio *bio) closure_put(&ca->prio); } -static void prio_io(struct cache *ca, uint64_t bucket, int op, - unsigned long op_flags) +static void prio_io(struct cache *ca, uint64_t bucket, blk_opf_t opf) { struct closure *cl = &ca->prio; struct bio *bio = bch_bbio_alloc(ca->set); @@ -601,7 +600,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, bio->bi_end_io = prio_endio; bio->bi_private = ca; - bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); + bio->bi_opf = opf | REQ_SYNC | REQ_META; bch_bio_map(bio, ca->disk_buckets); closure_bio_submit(ca->set, bio, &ca->prio); @@ -661,7 +660,7 @@ int bch_prio_write(struct cache *ca, bool wait) BUG_ON(bucket == -1); mutex_unlock(&ca->set->bucket_lock); - prio_io(ca, bucket, REQ_OP_WRITE, 0); + prio_io(ca, bucket, REQ_OP_WRITE); mutex_lock(&ca->set->bucket_lock); ca->prio_buckets[i] = bucket; @@ -705,7 +704,7 @@ static int prio_read(struct cache *ca, uint64_t bucket) ca->prio_last_buckets[bucket_nr] = bucket; bucket_nr++; - prio_io(ca, bucket, REQ_OP_READ, 0); + prio_io(ca, bucket, REQ_OP_READ); if (p->csum != bch_crc64(&p->magic, meta_bucket_bytes(&ca->sb) - 8)) { From 3c5e514db58fdca10ff5e08a5d2e8a4b077300e4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:00 -0700 Subject: [PATCH 125/178] md/raid1: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for variables that represent request flags. Acked-by: Song Liu Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-35-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/raid1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 8f1a2e4a6e50..05d8438cfec8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1220,8 +1220,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, struct raid1_info *mirror; struct bio *read_bio; struct bitmap *bitmap = mddev->bitmap; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const enum req_op op = bio_op(bio); + const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; int rdisk; bool r1bio_existed = !!r1_bio; From cb1802ff82e1ebbbafd860e5a73c26607d72dcd9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:01 -0700 Subject: [PATCH 126/178] md/raid10: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for variables that represent a request flags. Acked-by: Song Liu Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-36-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/raid10.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3b80120cba30..26545950ca42 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1136,8 +1136,8 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, { struct r10conf *conf = mddev->private; struct bio *read_bio; - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); + const enum req_op op = bio_op(bio); + const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; int max_sectors; struct md_rdev *rdev; char b[BDEVNAME_SIZE]; @@ -1230,9 +1230,9 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio, struct bio *bio, bool replacement, int n_copy) { - const int op = bio_op(bio); - const unsigned long do_sync = (bio->bi_opf & REQ_SYNC); - const unsigned long do_fua = (bio->bi_opf & REQ_FUA); + const enum req_op op = bio_op(bio); + const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; + const blk_opf_t do_fua = bio->bi_opf & REQ_FUA; unsigned long flags; struct blk_plug_cb *cb; struct raid1_plug_cb *plug = NULL; From a9010741ce7c9533fa825cc49f0739d4d8ebda48 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:02 -0700 Subject: [PATCH 127/178] md/raid5: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Acked-by: Song Liu Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-37-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/raid5.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5d09256d7f81..cae1612580fc 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1082,7 +1082,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) should_defer = conf->batch_bio_dispatch && conf->group_cnt; for (i = disks; i--; ) { - int op, op_flags = 0; + enum req_op op; + blk_opf_t op_flags = 0; int replace_only = 0; struct bio *bi, *rbi; struct md_rdev *rdev, *rrdev = NULL; From f9ed86dc1dc87662145d0327845fde1c6f3db6cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:03 -0700 Subject: [PATCH 128/178] nvme/host: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Reviewed-by: Sagi Grimberg Cc: Christoph Hellwig Cc: Keith Busch Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-38-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/nvme/host/ioctl.c | 4 ++-- drivers/nvme/host/nvme.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index a2e89db1cd63..27614bee7380 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -68,7 +68,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, void **metap, unsigned timeout, bool vec, - unsigned int rq_flags, blk_mq_req_flags_t blk_flags) + blk_opf_t rq_flags, blk_mq_req_flags_t blk_flags) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -407,7 +407,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_uring_data d; struct nvme_command c; struct request *req; - unsigned int rq_flags = 0; + blk_opf_t rq_flags = 0; blk_mq_req_flags_t blk_flags = 0; void *meta = NULL; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index e4daa57f8bd5..f453e816426a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -733,7 +733,7 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); void nvme_start_freeze(struct nvme_ctrl *ctrl); -static inline unsigned int nvme_req_op(struct nvme_command *cmd) +static inline enum req_op nvme_req_op(struct nvme_command *cmd) { return nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN; } From a288000f9fe381a21693832275491b9c802921c4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:04 -0700 Subject: [PATCH 129/178] nvme/target: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for variables that represent a request operation combined with request flags. Rename those variables from 'op' into 'opf'. Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Chaitanya Kulkarni Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-39-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/nvme/target/io-cmd-bdev.c | 17 +++++++++-------- drivers/nvme/target/zns.c | 6 +++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 27a72504d31c..2dc1c1035626 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -246,7 +246,8 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) struct scatterlist *sg; struct blk_plug plug; sector_t sector; - int op, i, rc; + blk_opf_t opf; + int i, rc; struct sg_mapping_iter prot_miter; unsigned int iter_flags; unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; @@ -260,26 +261,26 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } if (req->cmd->rw.opcode == nvme_cmd_write) { - op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; + opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - op |= REQ_FUA; + opf |= REQ_FUA; iter_flags = SG_MITER_TO_SG; } else { - op = REQ_OP_READ; + opf = REQ_OP_READ; iter_flags = SG_MITER_FROM_SG; } if (is_pci_p2pdma_page(sg_page(req->sg))) - op |= REQ_NOMERGE; + opf |= REQ_NOMERGE; sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); if (nvmet_use_inline_bvec(req)) { bio = &req->b.inline_bio; bio_init(bio, req->ns->bdev, req->inline_bvec, - ARRAY_SIZE(req->inline_bvec), op); + ARRAY_SIZE(req->inline_bvec), opf); } else { - bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), op, + bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), opf, GFP_KERNEL); } bio->bi_iter.bi_sector = sector; @@ -306,7 +307,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } bio = bio_alloc(req->ns->bdev, bio_max_segs(sg_cnt), - op, GFP_KERNEL); + opf, GFP_KERNEL); bio->bi_iter.bi_sector = sector; bio_chain(bio, prev); diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index b233c0943fec..c7ef69f29fe4 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -525,7 +525,7 @@ static void nvmet_bdev_zone_append_bio_done(struct bio *bio) void nvmet_bdev_execute_zone_append(struct nvmet_req *req) { sector_t sect = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); - const unsigned int op = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; + const blk_opf_t opf = REQ_OP_ZONE_APPEND | REQ_SYNC | REQ_IDLE; u16 status = NVME_SC_SUCCESS; unsigned int total_len = 0; struct scatterlist *sg; @@ -556,9 +556,9 @@ void nvmet_bdev_execute_zone_append(struct nvmet_req *req) if (nvmet_use_inline_bvec(req)) { bio = &req->z.inline_bio; bio_init(bio, req->ns->bdev, req->inline_bvec, - ARRAY_SIZE(req->inline_bvec), op); + ARRAY_SIZE(req->inline_bvec), opf); } else { - bio = bio_alloc(req->ns->bdev, req->sg_cnt, op, GFP_KERNEL); + bio = bio_alloc(req->ns->bdev, req->sg_cnt, opf, GFP_KERNEL); } bio->bi_end_io = nvmet_bdev_zone_append_bio_done; From ea957547e819a21bd49895c6162f78d542867d39 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:05 -0700 Subject: [PATCH 130/178] scsi/core: Improve static type checking Improve static type checking by using the new blk_opf_t type for the combination of a request operation and its flags. Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-40-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 6 +++--- include/scsi/scsi_cmnd.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 1b3ca5c16c3d..06ec4705caf9 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1125,12 +1125,12 @@ static void scsi_initialize_rq(struct request *rq) cmd->retries = 0; } -struct request *scsi_alloc_request(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags) +struct request *scsi_alloc_request(struct request_queue *q, blk_opf_t opf, + blk_mq_req_flags_t flags) { struct request *rq; - rq = blk_mq_alloc_request(q, op, flags); + rq = blk_mq_alloc_request(q, opf, flags); if (!IS_ERR(rq)) scsi_initialize_rq(rq); return rq; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 1e80e70dfa92..bac55decf900 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -386,7 +386,7 @@ static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd) extern void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq); -struct request *scsi_alloc_request(struct request_queue *q, - unsigned int op, blk_mq_req_flags_t flags); +struct request *scsi_alloc_request(struct request_queue *q, blk_opf_t opf, + blk_mq_req_flags_t flags); #endif /* _SCSI_SCSI_CMND_H */ From 88b32c3cdf5fff7ed5bdaec7493428185cc65b6e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:06 -0700 Subject: [PATCH 131/178] scsi/core: Change the return type of scsi_noretry_cmd() into bool This patch prepares for introducing the new blk_opf_t type in the SCSI core. Since the value returned by scsi_noretry_cmd() is only used in boolean expressions, this patch does not change any functionality. Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-41-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/scsi/scsi_error.c | 16 ++++++++-------- drivers/scsi/scsi_priv.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 266ce414589c..b776cefc7cda 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -1779,7 +1779,7 @@ static void scsi_eh_offline_sdevs(struct list_head *work_q, * scsi_noretry_cmd - determine if command should be failed fast * @scmd: SCSI cmd to examine. */ -int scsi_noretry_cmd(struct scsi_cmnd *scmd) +bool scsi_noretry_cmd(struct scsi_cmnd *scmd) { struct request *req = scsi_cmd_to_rq(scmd); @@ -1789,19 +1789,19 @@ int scsi_noretry_cmd(struct scsi_cmnd *scmd) case DID_TIME_OUT: goto check_type; case DID_BUS_BUSY: - return req->cmd_flags & REQ_FAILFAST_TRANSPORT; + return !!(req->cmd_flags & REQ_FAILFAST_TRANSPORT); case DID_PARITY: - return req->cmd_flags & REQ_FAILFAST_DEV; + return !!(req->cmd_flags & REQ_FAILFAST_DEV); case DID_ERROR: if (get_status_byte(scmd) == SAM_STAT_RESERVATION_CONFLICT) - return 0; + return false; fallthrough; case DID_SOFT_ERROR: - return req->cmd_flags & REQ_FAILFAST_DRIVER; + return !!(req->cmd_flags & REQ_FAILFAST_DRIVER); } if (!scsi_status_is_check_condition(scmd->result)) - return 0; + return false; check_type: /* @@ -1809,9 +1809,9 @@ check_type: * the check condition was retryable. */ if (req->cmd_flags & REQ_FAILFAST_DEV || blk_rq_is_passthrough(req)) - return 1; + return true; - return 0; + return false; } /** diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 6eeaa0a7f86d..429663bd78ec 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -82,7 +82,7 @@ void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *done_q); int scsi_eh_get_sense(struct list_head *work_q, struct list_head *done_q); -int scsi_noretry_cmd(struct scsi_cmnd *scmd); +bool scsi_noretry_cmd(struct scsi_cmnd *scmd); void scsi_eh_done(struct scsi_cmnd *scmd); /* scsi_lib.c */ From 2599cac57a9af4e7ce628e2ef41e92797cba4ae2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:07 -0700 Subject: [PATCH 132/178] scsi/core: Use the new blk_opf_t type Use the new blk_opf_t type for arguments and variables that represent request flags. Use the !! operator in scsi_noretry_cmd() to convert the blk_opf_t type into a boolean. This patch does not change any functionality. Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: John Garry Cc: Mike Christie Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-42-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 6 +++--- include/scsi/scsi_device.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 06ec4705caf9..17a617db9ae0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -209,8 +209,8 @@ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, u64 flags, req_flags_t rq_flags, - int *resid) + int timeout, int retries, blk_opf_t flags, + req_flags_t rq_flags, int *resid) { struct request *req; struct scsi_cmnd *scmd; @@ -633,7 +633,7 @@ static blk_status_t scsi_result_to_blk_status(struct scsi_cmnd *cmd, int result) */ static unsigned int scsi_rq_err_bytes(const struct request *rq) { - unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; + blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; unsigned int bytes = 0; struct bio *bio; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 7cf5f3b7589f..2493bd65351a 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -457,7 +457,7 @@ extern void scsi_sanitize_inquiry_string(unsigned char *s, int len); extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, unsigned char *sense, struct scsi_sense_hdr *sshdr, - int timeout, int retries, u64 flags, + int timeout, int retries, blk_opf_t flags, req_flags_t rq_flags, int *resid); /* Make sure any sense buffer is the correct size. */ #define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ From c15cbe9a84b05462195102bcead0213eb068c595 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:08 -0700 Subject: [PATCH 133/178] scsi/device_handlers: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for variables that represent request flags. Cc: Hannes Reinecke Cc: Martin Wilck Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-43-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/scsi/device_handler/scsi_dh_alua.c | 4 ++-- drivers/scsi/device_handler/scsi_dh_emc.c | 2 +- drivers/scsi/device_handler/scsi_dh_hp_sw.c | 4 ++-- drivers/scsi/device_handler/scsi_dh_rdac.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 1d9be771f3ee..610a51538f03 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -127,7 +127,7 @@ static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, int bufflen, struct scsi_sense_hdr *sshdr, int flags) { u8 cdb[MAX_COMMAND_SIZE]; - int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; /* Prepare the command. */ @@ -157,7 +157,7 @@ static int submit_stpg(struct scsi_device *sdev, int group_id, u8 cdb[MAX_COMMAND_SIZE]; unsigned char stpg_data[8]; int stpg_len = 8; - int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; /* Prepare the data buffer */ diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c index bd28ec6cfb72..2e21ab447873 100644 --- a/drivers/scsi/device_handler/scsi_dh_emc.c +++ b/drivers/scsi/device_handler/scsi_dh_emc.c @@ -239,7 +239,7 @@ static int send_trespass_cmd(struct scsi_device *sdev, unsigned char cdb[MAX_COMMAND_SIZE]; int err, res = SCSI_DH_OK, len; struct scsi_sense_hdr sshdr; - u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; if (csdev->flags & CLARIION_SHORT_TRESPASS) { diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c index 4a3f7831a2d6..0d2cfa60aa06 100644 --- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c +++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c @@ -83,7 +83,7 @@ static int hp_sw_tur(struct scsi_device *sdev, struct hp_sw_dh_data *h) unsigned char cmd[6] = { TEST_UNIT_READY }; struct scsi_sense_hdr sshdr; int ret = SCSI_DH_OK, res; - u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; retry: @@ -121,7 +121,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h) struct scsi_device *sdev = h->sdev; int res, rc = SCSI_DH_OK; int retry_cnt = HP_SW_RETRIES; - u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; retry: diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 66652ab409cc..bf8754741f85 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -536,7 +536,7 @@ static void send_mode_select(struct work_struct *work) unsigned char cdb[MAX_COMMAND_SIZE]; struct scsi_sense_hdr sshdr; unsigned int data_size; - u64 req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | + blk_opf_t req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER; spin_lock(&ctlr->ms_lock); From 0d8009f39d0adb5b0415190f71841a88f14d9790 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:09 -0700 Subject: [PATCH 134/178] scsi/ufs: Rename a 'dir' argument into 'op' Improve consistency of the kernel code by renaming a request operation argument from 'dir' into 'op'. Reviewed-by: Avri Altman Cc: Martin K. Petersen Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-44-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/ufs/core/ufshpb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c index 24f1ee82c215..a1a7a1175a5a 100644 --- a/drivers/ufs/core/ufshpb.c +++ b/drivers/ufs/core/ufshpb.c @@ -434,7 +434,7 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) } static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb, int rgn_idx, - enum req_op dir, bool atomic) + enum req_op op, bool atomic) { struct ufshpb_req *rq; struct request *req; @@ -445,7 +445,7 @@ static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb, int rgn_idx, return NULL; retry: - req = blk_mq_alloc_request(hpb->sdev_ufs_lu->request_queue, dir, + req = blk_mq_alloc_request(hpb->sdev_ufs_lu->request_queue, op, BLK_MQ_REQ_NOWAIT); if (!atomic && (PTR_ERR(req) == -EWOULDBLOCK) && (--retries > 0)) { From 79fe9d7d9f6479d3fe85d39813ec452844fac99a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:10 -0700 Subject: [PATCH 135/178] scsi/target: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for variables that represent a request operation combined with request flags. Cc: Mike Christie Cc: Bodo Stroesser Cc: Mingzhe Zou Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-45-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/target/target_core_iblock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 378c80313a0f..5fef19af88df 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -343,7 +343,7 @@ static void iblock_bio_done(struct bio *bio) } static struct bio *iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, - unsigned int opf) + blk_opf_t opf) { struct iblock_dev *ib_dev = IBLOCK_DEV(cmd->se_dev); struct bio *bio; @@ -719,7 +719,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, struct bio_list list; struct scatterlist *sg; u32 sg_num = sgl_nents; - unsigned int opf; + blk_opf_t opf; unsigned bio_cnt; int i, rc; struct sg_mapping_iter prot_miter; From f8e6e4bd9fd8c452565f3eaeb358e3cc08d880f4 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:11 -0700 Subject: [PATCH 136/178] mm: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Cc: Andrew Morton Cc: Christoph Hellwig Cc: Jan Kara Cc: Stefan Roesch Cc: NeilBrown Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-46-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index da21d63f70e2..e91bea371b18 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -101,9 +101,9 @@ struct writeback_control { #endif }; -static inline int wbc_to_write_flags(struct writeback_control *wbc) +static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc) { - int flags = 0; + blk_opf_t flags = 0; if (wbc->punt_to_cgroup) flags = REQ_CGROUP_PUNT; From 3ae7286943ae6f6bfecfe0a3da9d1a4c64f5531f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:12 -0700 Subject: [PATCH 137/178] fs/buffer: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Change WRITE into REQ_OP_WRITE. This patch does not change any functionality since REQ_OP_WRITE == WRITE == 1. Reviewed-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-47-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/buffer.c | 21 +++++++++++---------- include/linux/buffer_head.h | 9 +++++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index 898c7f301b1b..4a00b61f35ec 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -52,8 +52,8 @@ #include "internal.h" static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); -static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - struct writeback_control *wbc); +static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, + struct buffer_head *bh, struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -1716,7 +1716,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, struct buffer_head *bh, *head; unsigned int blocksize, bbits; int nr_underway = 0; - int write_flags = wbc_to_write_flags(wbc); + blk_opf_t write_flags = wbc_to_write_flags(wbc); head = create_page_buffers(page, inode, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -2994,8 +2994,8 @@ static void end_bio_bh_io_sync(struct bio *bio) bio_put(bio); } -static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, - struct writeback_control *wbc) +static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, + struct buffer_head *bh, struct writeback_control *wbc) { struct bio *bio; @@ -3040,7 +3040,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, return 0; } -int submit_bh(int op, int op_flags, struct buffer_head *bh) +int submit_bh(enum req_op op, blk_opf_t op_flags, struct buffer_head *bh) { return submit_bh_wbc(op, op_flags, bh, NULL); } @@ -3072,7 +3072,8 @@ EXPORT_SYMBOL(submit_bh); * All of the buffers must be for the same device, and must also be a * multiple of the current approved size for the device. */ -void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[]) +void ll_rw_block(enum req_op op, blk_opf_t op_flags, int nr, + struct buffer_head *bhs[]) { int i; @@ -3081,7 +3082,7 @@ void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[]) if (!trylock_buffer(bh)) continue; - if (op == WRITE) { + if (op == REQ_OP_WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); @@ -3101,7 +3102,7 @@ void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); -void write_dirty_buffer(struct buffer_head *bh, int op_flags) +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) { lock_buffer(bh); if (!test_clear_buffer_dirty(bh)) { @@ -3119,7 +3120,7 @@ EXPORT_SYMBOL(write_dirty_buffer); * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags) +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) { int ret = 0; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..9795df9400bd 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -9,6 +9,7 @@ #define _LINUX_BUFFER_HEAD_H #include +#include #include #include #include @@ -201,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(int, int, int, struct buffer_head * bh[]); +void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); -void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int submit_bh(int, int, struct buffer_head *); +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); From 1420c4a549bf28ffddbed827d61fb3d4d2132ddb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:13 -0700 Subject: [PATCH 138/178] fs/buffer: Combine two submit_bh() and ll_rw_block() arguments Both submit_bh() and ll_rw_block() accept a request operation type and request flags as their first two arguments. Micro-optimize these two functions by combining these first two arguments into a single argument. This patch does not change the behavior of any of the modified code. Cc: Alexander Viro Cc: Jan Kara Acked-by: Song Liu (for the md changes) Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-48-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/md-bitmap.c | 4 +-- fs/buffer.c | 53 +++++++++++++++++++------------------ fs/ext4/fast_commit.c | 2 +- fs/ext4/mmp.c | 2 +- fs/ext4/super.c | 6 ++--- fs/gfs2/bmap.c | 5 ++-- fs/gfs2/dir.c | 5 ++-- fs/gfs2/meta_io.c | 9 +++---- fs/gfs2/quota.c | 2 +- fs/isofs/compress.c | 2 +- fs/jbd2/commit.c | 8 +++--- fs/jbd2/journal.c | 4 +-- fs/jbd2/recovery.c | 4 +-- fs/nilfs2/btnode.c | 2 +- fs/nilfs2/gcinode.c | 2 +- fs/nilfs2/mdt.c | 2 +- fs/ntfs/aops.c | 6 ++--- fs/ntfs/compress.c | 2 +- fs/ntfs/file.c | 2 +- fs/ntfs/logfile.c | 2 +- fs/ntfs/mft.c | 4 +-- fs/ntfs3/file.c | 2 +- fs/ntfs3/inode.c | 2 +- fs/ocfs2/aops.c | 2 +- fs/ocfs2/buffer_head_io.c | 8 +++--- fs/ocfs2/super.c | 2 +- fs/reiserfs/inode.c | 4 +-- fs/reiserfs/journal.c | 12 ++++----- fs/reiserfs/stree.c | 4 +-- fs/reiserfs/super.c | 2 +- fs/udf/dir.c | 2 +- fs/udf/directory.c | 2 +- fs/udf/inode.c | 2 +- fs/ufs/balloc.c | 2 +- include/linux/buffer_head.h | 4 +-- 35 files changed, 88 insertions(+), 90 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 0a21b8317103..bf6dffadbe6f 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -302,7 +302,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); bh = bh->b_this_page; } @@ -394,7 +394,7 @@ static int read_page(struct file *file, unsigned long index, atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } blk_cur++; bh = bh->b_this_page; diff --git a/fs/buffer.c b/fs/buffer.c index 4a00b61f35ec..af53569930bb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -52,8 +52,8 @@ #include "internal.h" static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); -static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, - struct buffer_head *bh, struct writeback_control *wbc); +static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, + struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -562,7 +562,7 @@ void write_boundary_block(struct block_device *bdev, struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) - ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); + ll_rw_block(REQ_OP_WRITE, 1, &bh); put_bh(bh); } } @@ -1174,7 +1174,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) } else { get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -1342,7 +1342,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); if (likely(bh)) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh); brelse(bh); } } @@ -1353,7 +1353,7 @@ void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, { struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); if (likely(bh)) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh); brelse(bh); } } @@ -1804,7 +1804,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); nr_underway++; } bh = next; @@ -1858,7 +1858,7 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); nr_underway++; } bh = next; @@ -2033,7 +2033,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); *wait_bh++=bh; } } @@ -2334,7 +2334,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) if (buffer_uptodate(bh)) end_buffer_async_read(bh, 1); else - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } return 0; } @@ -2665,7 +2665,7 @@ int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len, if (block_start < from || block_end > to) { lock_buffer(bh); bh->b_end_io = end_buffer_read_nobh; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); nr_reads++; } } @@ -2915,7 +2915,7 @@ int block_truncate_page(struct address_space *mapping, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { err = -EIO; - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) @@ -2994,9 +2994,10 @@ static void end_bio_bh_io_sync(struct bio *bio) bio_put(bio); } -static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, - struct buffer_head *bh, struct writeback_control *wbc) +static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, + struct writeback_control *wbc) { + const enum req_op op = opf & REQ_OP_MASK; struct bio *bio; BUG_ON(!buffer_locked(bh)); @@ -3012,11 +3013,11 @@ static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, clear_buffer_write_io_error(bh); if (buffer_meta(bh)) - op_flags |= REQ_META; + opf |= REQ_META; if (buffer_prio(bh)) - op_flags |= REQ_PRIO; + opf |= REQ_PRIO; - bio = bio_alloc(bh->b_bdev, 1, op | op_flags, GFP_NOIO); + bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); @@ -3040,9 +3041,9 @@ static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, return 0; } -int submit_bh(enum req_op op, blk_opf_t op_flags, struct buffer_head *bh) +int submit_bh(blk_opf_t opf, struct buffer_head *bh) { - return submit_bh_wbc(op, op_flags, bh, NULL); + return submit_bh_wbc(opf, bh, NULL); } EXPORT_SYMBOL(submit_bh); @@ -3072,9 +3073,9 @@ EXPORT_SYMBOL(submit_bh); * All of the buffers must be for the same device, and must also be a * multiple of the current approved size for the device. */ -void ll_rw_block(enum req_op op, blk_opf_t op_flags, int nr, - struct buffer_head *bhs[]) +void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[]) { + const enum req_op op = opf & REQ_OP_MASK; int i; for (i = 0; i < nr; i++) { @@ -3086,14 +3087,14 @@ void ll_rw_block(enum req_op op, blk_opf_t op_flags, int nr, if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(op, op_flags, bh); + submit_bh(opf, bh); continue; } } else { if (!buffer_uptodate(bh)) { bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(op, op_flags, bh); + submit_bh(opf, bh); continue; } } @@ -3111,7 +3112,7 @@ void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) } bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, op_flags, bh); + submit_bh(REQ_OP_WRITE | op_flags, bh); } EXPORT_SYMBOL(write_dirty_buffer); @@ -3138,7 +3139,7 @@ int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, op_flags, bh); + ret = submit_bh(REQ_OP_WRITE | op_flags, bh); wait_on_buffer(bh); if (!ret && !buffer_uptodate(bh)) ret = -EIO; @@ -3366,7 +3367,7 @@ int bh_submit_read(struct buffer_head *bh) get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return 0; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 795a60ad1897..0df5482c6c1c 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -668,7 +668,7 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) set_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = ext4_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, write_flags, bh); + submit_bh(REQ_OP_WRITE | write_flags, bh); EXT4_SB(sb)->s_fc_bh = NULL; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index b221f313ded6..9af68a7ecdcf 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 845f2f8aee5f..24922184b622 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -171,7 +171,7 @@ static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags, bh->b_end_io = end_io ? end_io : end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, op_flags, bh); + submit_bh(REQ_OP_READ | op_flags, bh); } void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, @@ -5939,8 +5939,8 @@ static int ext4_commit_super(struct super_block *sb) /* Clear potential dirty bit if it was journalled update */ clear_buffer_dirty(sbh); sbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); + submit_bh(REQ_OP_WRITE | REQ_SYNC | + (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); wait_on_buffer(sbh); if (buffer_write_io_error(sbh)) { ext4_msg(sb, KERN_ERR, "I/O error while writing " diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index b6697333bb2b..3bdb2c668a71 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -310,9 +310,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - rabh); + submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, rabh); continue; } unlock_buffer(rabh); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 42b7dfffb5e7..a0562dd1bada 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1508,9 +1508,8 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, continue; } bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - bh); + submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, bh); continue; } brelse(bh); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 868dcc71b581..3570739f005d 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -75,7 +75,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, write_flags, bh); + submit_bh(REQ_OP_WRITE | write_flags, bh); nr_underway++; } bh = next; @@ -527,7 +527,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh); + ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &first_bh); dblock++; extlen--; @@ -536,9 +536,8 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, 1, &bh); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 59d727a4ae2c..c98a7faa67d3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -746,7 +746,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, if (PageUptodate(page)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 95a19f25d61c..b466172eec25 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -82,7 +82,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, return 0; } haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks); - ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs); + ll_rw_block(REQ_OP_READ, haveblocks, bhs); curbh = 0; curpage = 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index eb315e81f1a6..890b5543a1c5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -155,10 +155,10 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(REQ_OP_WRITE, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); + ret = submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | + REQ_FUA, bh); else - ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); *cbh = bh; return ret; @@ -763,7 +763,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); } cond_resched(); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9015f5fa2862..07e6aaf7e213 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1638,7 +1638,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) sb->s_checksum = jbd2_superblock_csum(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, write_flags, bh); + ret = submit_bh(REQ_OP_WRITE | write_flags, bh); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); @@ -1900,7 +1900,7 @@ static int journal_get_superblock(journal_t *journal) J_ASSERT(bh != NULL); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { printk(KERN_ERR diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 8ca3527189f8..e699d6ab2c0e 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -100,7 +100,7 @@ static int do_readahead(journal_t *journal, unsigned int start) if (!buffer_uptodate(bh) && !buffer_locked(bh)) { bufs[nbufs++] = bh; if (nbufs == MAXBUF) { - ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); + ll_rw_block(REQ_OP_READ, nbufs, bufs); journal_brelse_array(bufs, nbufs); nbufs = 0; } @@ -109,7 +109,7 @@ static int do_readahead(journal_t *journal, unsigned int start) } if (nbufs) - ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); + ll_rw_block(REQ_OP_READ, nbufs, bufs); err = 0; failed: diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index ca611ac09f7c..5c39efbf733f 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -122,7 +122,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, mode_flags, bh); + submit_bh(mode | mode_flags, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 04fdd420eae7..847def8af315 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -92,7 +92,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); if (vbn) bh->b_blocknr = vbn; out: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index d29a0f2b9c16..66e8811c2528 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -148,7 +148,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, mode_flags, bh); + submit_bh(mode | mode_flags, bh); ret = 0; trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 9e3964ea2ea0..b5765fdb3a47 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -342,7 +342,7 @@ handle_zblock: for (i = 0; i < nr; i++) { tbh = arr[i]; if (likely(!buffer_uptodate(tbh))) - submit_bh(REQ_OP_READ, 0, tbh); + submit_bh(REQ_OP_READ, tbh); else ntfs_end_buffer_async_read(tbh, 1); } @@ -859,7 +859,7 @@ lock_retry_remap: do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); need_end_writeback = false; } bh = next; @@ -1187,7 +1187,7 @@ lock_retry_remap: BUG_ON(!buffer_mapped(tbh)); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, tbh); + submit_bh(REQ_OP_WRITE, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (is_mft && !sync) diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index a60f543e7557..587e9b187873 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -658,7 +658,7 @@ lock_retry_remap: } get_bh(tbh); tbh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, tbh); + submit_bh(REQ_OP_READ, tbh); } /* Wait for io completion on all buffer heads. */ diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index a8abe2296514..46ed69b86c33 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -537,7 +537,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) lock_buffer(bh); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - return submit_bh(REQ_OP_READ, 0, bh); + return submit_bh(REQ_OP_READ, bh); } /** diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index bc1bf217b38e..6ce60ffc6ac0 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -807,7 +807,7 @@ map_vcn: * completed ignore errors afterwards as we can assume * that if one buffer worked all of them will work. */ - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); if (should_wait) { should_wait = false; wait_on_buffer(bh); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 0d62cd5bb7f8..f7bf5ce960cc 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -583,7 +583,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, tbh); + submit_bh(REQ_OP_WRITE, tbh); } /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { @@ -780,7 +780,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, tbh); + submit_bh(REQ_OP_WRITE, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (!sync && ni->mft_no < vol->mftmirr_size) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 8e9d2b35175f..4a21745711fe 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -242,7 +242,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) lock_buffer(bh); bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index be4ebdd8048b..d100a063def2 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -629,7 +629,7 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, bh->b_size = block_size; off = vbo & (PAGE_SIZE - 1); set_bh_page(bh, page, off); - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { err = -EIO; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 35d40a67204c..304ed2be1b83 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -638,7 +638,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, !buffer_new(bh) && ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); *wait_bh++=bh; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index e7758778abef..196638a22b48 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -64,7 +64,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); wait_on_buffer(bh); @@ -147,7 +147,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } read_failure: @@ -328,7 +328,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, if (validate) set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); continue; } } @@ -449,7 +449,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); wait_on_buffer(bh); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f7298816d8d9..e68807196076 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1785,7 +1785,7 @@ static int ocfs2_get_sector(struct super_block *sb, if (!buffer_dirty(*bh)) clear_buffer_uptodate(*bh); unlock_buffer(*bh); - ll_rw_block(REQ_OP_READ, 0, 1, bh); + ll_rw_block(REQ_OP_READ, 1, bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { mlog_errno(-EIO); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0cffe054b78e..23f542d1748b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2664,7 +2664,7 @@ static int reiserfs_write_full_page(struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); nr++; } put_bh(bh); @@ -2724,7 +2724,7 @@ fail: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); nr++; } put_bh(bh); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index d8cc9a366124..94addfcefede 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -650,7 +650,7 @@ static void submit_logged_buffer(struct buffer_head *bh) BUG(); if (!buffer_uptodate(bh)) BUG(); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); } static void submit_ordered_buffer(struct buffer_head *bh) @@ -660,7 +660,7 @@ static void submit_ordered_buffer(struct buffer_head *bh) clear_buffer_dirty(bh); if (!buffer_uptodate(bh)) BUG(); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); } #define CHUNK_SIZE 32 @@ -868,7 +868,7 @@ loop_next: */ if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { spin_unlock(lock); - ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); + ll_rw_block(REQ_OP_WRITE, 1, &bh); spin_lock(lock); } put_bh(bh); @@ -1054,7 +1054,7 @@ static int flush_commit_list(struct super_block *s, if (tbh) { if (buffer_dirty(tbh)) { depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh); + ll_rw_block(REQ_OP_WRITE, 1, &tbh); reiserfs_write_lock_nested(s, depth); } put_bh(tbh) ; @@ -2240,7 +2240,7 @@ abort_replay: } } /* read in the log blocks, memcpy to the corresponding real block */ - ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks); + ll_rw_block(REQ_OP_READ, get_desc_trans_len(desc), log_blocks); for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(log_blocks[i]); @@ -2342,7 +2342,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, } else bhlist[j++] = bh; } - ll_rw_block(REQ_OP_READ, 0, j, bhlist); + ll_rw_block(REQ_OP_READ, j, bhlist); for (i = 1; i < j; i++) brelse(bhlist[i]); bh = bhlist[0]; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index ef42729216d1..9a293609a022 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -579,7 +579,7 @@ static int search_by_key_reada(struct super_block *s, if (!buffer_uptodate(bh[j])) { if (depth == -1) depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, bh + j); } brelse(bh[j]); } @@ -685,7 +685,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, if (!buffer_uptodate(bh) && depth == -1) depth = reiserfs_write_unlock_nested(sb); - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (depth != -1) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index cfb7c44c7366..c88cd2ce0665 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1702,7 +1702,7 @@ static int read_super_block(struct super_block *s, int offset) /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { - ll_rw_block(REQ_OP_READ, 0, 1, &SB_BUFFER_WITH_SB(s)); + ll_rw_block(REQ_OP_READ, 1, &SB_BUFFER_WITH_SB(s)); wait_on_buffer(SB_BUFFER_WITH_SB(s)); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 42e3e551fa4c..cad3772f9dbe 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -130,7 +130,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 73720320f0ab..a2adf6293093 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -89,7 +89,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index edc88716751a..8d06daed549f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1214,7 +1214,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block, if (buffer_uptodate(bh)) return bh; - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 075d3d9114c8..bd810d8239f2 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -296,7 +296,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg, if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, oldb + pos); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ufs_error(inode->i_sb, __func__, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9795df9400bd..bb68eb6407da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -202,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); +void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); +int submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); From c6293eacfc16fe3d85f468fc7ed91eb18f5861d3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:14 -0700 Subject: [PATCH 139/178] fs/direct-io: Reduce the size of struct dio Reduce the size of struct dio by combining the 'op' and 'op_flags' into the new 'opf' member. Use the new blk_opf_t type to improve static type checking. This patch does not change any functionality. Reviewed-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Cc: Darrick J. Wong Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-49-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/direct-io.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 840752006f60..94b71440c332 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -117,8 +117,7 @@ struct dio_submit { /* dio_state communicated between submission path and end_io */ struct dio { int flags; /* doesn't change */ - int op; - int op_flags; + blk_opf_t opf; /* request operation type and flags */ struct gendisk *bio_disk; struct inode *inode; loff_t i_size; /* i_size when submitted */ @@ -167,12 +166,13 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) */ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { + const enum req_op dio_op = dio->opf & REQ_OP_MASK; ssize_t ret; ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES, &sdio->from); - if (ret < 0 && sdio->blocks_available && (dio->op == REQ_OP_WRITE)) { + if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) { struct page *page = ZERO_PAGE(0); /* * A memory fault, but the filesystem has some outstanding @@ -234,6 +234,7 @@ static inline struct page *dio_get_page(struct dio *dio, */ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) { + const enum req_op dio_op = dio->opf & REQ_OP_MASK; loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; int err; @@ -251,7 +252,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) transferred = dio->result; /* Check for short read case */ - if ((dio->op == REQ_OP_READ) && + if (dio_op == REQ_OP_READ && ((offset + transferred) > dio->i_size)) transferred = dio->i_size - offset; /* ignore EFAULT if some IO has been done */ @@ -286,7 +287,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) * zeros from unwritten extents. */ if (flags & DIO_COMPLETE_INVALIDATE && - ret > 0 && dio->op == REQ_OP_WRITE && + ret > 0 && dio_op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, @@ -305,7 +306,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) */ dio->iocb->ki_pos += transferred; - if (ret > 0 && dio->op == REQ_OP_WRITE) + if (ret > 0 && dio_op == REQ_OP_WRITE) ret = generic_write_sync(dio->iocb, ret); dio->iocb->ki_complete(dio->iocb, ret); } @@ -329,6 +330,7 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); static void dio_bio_end_aio(struct bio *bio) { struct dio *dio = bio->bi_private; + const enum req_op dio_op = dio->opf & REQ_OP_MASK; unsigned long remaining; unsigned long flags; bool defer_completion = false; @@ -353,7 +355,7 @@ static void dio_bio_end_aio(struct bio *bio) */ if (dio->result) defer_completion = dio->defer_completion || - (dio->op == REQ_OP_WRITE && + (dio_op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages); if (defer_completion) { INIT_WORK(&dio->complete_work, dio_aio_complete_work); @@ -396,7 +398,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, * bio_alloc() is guaranteed to return a bio when allowed to sleep and * we request a valid number of vectors. */ - bio = bio_alloc(bdev, nr_vecs, dio->op | dio->op_flags, GFP_KERNEL); + bio = bio_alloc(bdev, nr_vecs, dio->opf, GFP_KERNEL); bio->bi_iter.bi_sector = first_sector; if (dio->is_async) bio->bi_end_io = dio_bio_end_aio; @@ -415,6 +417,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, */ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) { + const enum req_op dio_op = dio->opf & REQ_OP_MASK; struct bio *bio = sdio->bio; unsigned long flags; @@ -426,7 +429,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) dio->refcount++; spin_unlock_irqrestore(&dio->bio_lock, flags); - if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) + if (dio->is_async && dio_op == REQ_OP_READ && dio->should_dirty) bio_set_pages_dirty(bio); dio->bio_disk = bio->bi_bdev->bd_disk; @@ -492,7 +495,8 @@ static struct bio *dio_await_one(struct dio *dio) static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio) { blk_status_t err = bio->bi_status; - bool should_dirty = dio->op == REQ_OP_READ && dio->should_dirty; + const enum req_op dio_op = dio->opf & REQ_OP_MASK; + bool should_dirty = dio_op == REQ_OP_READ && dio->should_dirty; if (err) { if (err == BLK_STS_AGAIN && (bio->bi_opf & REQ_NOWAIT)) @@ -619,6 +623,7 @@ static int dio_set_defer_completion(struct dio *dio) static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, struct buffer_head *map_bh) { + const enum req_op dio_op = dio->opf & REQ_OP_MASK; int ret; sector_t fs_startblk; /* Into file, in filesystem-sized blocks */ sector_t fs_endblk; /* Into file, in filesystem-sized blocks */ @@ -653,7 +658,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio, * which may decide to handle it or also return an unmapped * buffer head. */ - create = dio->op == REQ_OP_WRITE; + create = dio_op == REQ_OP_WRITE; if (dio->flags & DIO_SKIP_HOLES) { i_size = i_size_read(dio->inode); if (i_size && fs_startblk <= (i_size - 1) >> i_blkbits) @@ -801,10 +806,11 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, unsigned offset, unsigned len, sector_t blocknr, struct buffer_head *map_bh) { + const enum req_op dio_op = dio->opf & REQ_OP_MASK; int ret = 0; int boundary = sdio->boundary; /* dio_send_cur_page may clear it */ - if (dio->op == REQ_OP_WRITE) { + if (dio_op == REQ_OP_WRITE) { /* * Read accounting is performed in submit_bio() */ @@ -917,6 +923,7 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio, static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, struct buffer_head *map_bh) { + const enum req_op dio_op = dio->opf & REQ_OP_MASK; const unsigned blkbits = sdio->blkbits; const unsigned i_blkbits = blkbits + sdio->blkfactor; int ret = 0; @@ -992,7 +999,7 @@ do_holes: loff_t i_size_aligned; /* AKPM: eargh, -ENOTBLK is a hack */ - if (dio->op == REQ_OP_WRITE) { + if (dio_op == REQ_OP_WRITE) { put_page(page); return -ENOTBLK; } @@ -1196,12 +1203,11 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio->inode = inode; if (iov_iter_rw(iter) == WRITE) { - dio->op = REQ_OP_WRITE; - dio->op_flags = REQ_SYNC | REQ_IDLE; + dio->opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; if (iocb->ki_flags & IOCB_NOWAIT) - dio->op_flags |= REQ_NOWAIT; + dio->opf |= REQ_NOWAIT; } else { - dio->op = REQ_OP_READ; + dio->opf = REQ_OP_READ; } /* From f84c94afcf823c6c78438c56c9414763beec50d9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:15 -0700 Subject: [PATCH 140/178] fs/mpage: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for the combination of a block layer request with block layer request flags. Cc: Al Viro Cc: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-50-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/mpage.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/mpage.c b/fs/mpage.c index 0d25f44f5707..c6d8bf8c22a5 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -145,13 +145,13 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) struct block_device *bdev = NULL; int length; int fully_mapped = 1; - int op = REQ_OP_READ; + blk_opf_t opf = REQ_OP_READ; unsigned nblocks; unsigned relative_block; gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); if (args->is_readahead) { - op |= REQ_RAHEAD; + opf |= REQ_RAHEAD; gfp |= __GFP_NORETRY | __GFP_NOWARN; } @@ -269,7 +269,7 @@ alloc_new: page)) goto out; } - args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), op, + args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf, gfp); if (args->bio == NULL) goto confused; From bf9486d6dd2351f6cfff9a8df87657a1248a918d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:16 -0700 Subject: [PATCH 141/178] fs/btrfs: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Acked-by: David Sterba Cc: Josef Bacik Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-51-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/btrfs/check-integrity.c | 4 ++-- fs/btrfs/compression.c | 6 +++--- fs/btrfs/compression.h | 2 +- fs/btrfs/extent_io.c | 18 +++++++++--------- fs/btrfs/inode.c | 4 ++-- fs/btrfs/raid56.c | 4 ++-- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 5d20137b7b67..98c6e5feab19 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -152,7 +152,7 @@ struct btrfsic_block { struct btrfsic_block *next_in_same_bio; void *orig_bio_private; bio_end_io_t *orig_bio_end_io; - int submit_bio_bh_rw; + blk_opf_t submit_bio_bh_rw; u64 flush_gen; /* only valid if !never_written */ }; @@ -1681,7 +1681,7 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, u64 dev_bytenr, char **mapped_datav, unsigned int num_pages, struct bio *bio, int *bio_is_patched, - int submit_bio_bh_rw) + blk_opf_t submit_bio_bh_rw) { int is_metadata; struct btrfsic_block *block; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index f4564f32f6d9..a82b9f17f476 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -455,7 +455,7 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info, static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr, - unsigned int opf, bio_end_io_t endio_func, + blk_opf_t opf, bio_end_io_t endio_func, u64 *next_stripe_start) { struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb); @@ -505,7 +505,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, unsigned int compressed_len, struct page **compressed_pages, unsigned int nr_pages, - unsigned int write_flags, + blk_opf_t write_flags, struct cgroup_subsys_state *blkcg_css, bool writeback) { @@ -517,7 +517,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, blk_status_t ret; int skip_sum = inode->flags & BTRFS_INODE_NODATASUM; const bool use_append = btrfs_use_zone_append(inode, disk_start); - const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; + const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE; ASSERT(IS_ALIGNED(start, fs_info->sectorsize) && IS_ALIGNED(len, fs_info->sectorsize)); diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index 2707404389a5..2b56d63e01ce 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h @@ -99,7 +99,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start, unsigned int compressed_len, struct page **compressed_pages, unsigned int nr_pages, - unsigned int write_flags, + blk_opf_t write_flags, struct cgroup_subsys_state *blkcg_css, bool writeback); void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 04e36343da3a..60a20df353e7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3357,7 +3357,7 @@ static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl, static int alloc_new_bio(struct btrfs_inode *inode, struct btrfs_bio_ctrl *bio_ctrl, struct writeback_control *wbc, - unsigned int opf, + blk_opf_t opf, bio_end_io_t end_io_func, u64 disk_bytenr, u32 offset, u64 file_offset, enum btrfs_compression_type compress_type) @@ -3437,7 +3437,7 @@ error: * @prev_bio_flags: flags of previous bio to see if we can merge the current one * @compress_type: compress type for current bio */ -static int submit_extent_page(unsigned int opf, +static int submit_extent_page(blk_opf_t opf, struct writeback_control *wbc, struct btrfs_bio_ctrl *bio_ctrl, struct page *page, u64 disk_bytenr, @@ -3615,7 +3615,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, */ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, struct btrfs_bio_ctrl *bio_ctrl, - unsigned int read_flags, u64 *prev_em_start) + blk_opf_t read_flags, u64 *prev_em_start) { struct inode *inode = page->mapping->host; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -3983,8 +3983,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, int saved_ret = 0; int ret = 0; int nr = 0; - u32 opf = REQ_OP_WRITE; - const unsigned int write_flags = wbc_to_write_flags(wbc); + enum req_op op = REQ_OP_WRITE; + const blk_opf_t write_flags = wbc_to_write_flags(wbc); bool has_error = false; bool compressed; @@ -4058,7 +4058,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, iosize = min(min(em_end, end + 1), dirty_range_end) - cur; if (btrfs_use_zone_append(inode, em->block_start)) - opf = REQ_OP_ZONE_APPEND; + op = REQ_OP_ZONE_APPEND; free_extent_map(em); em = NULL; @@ -4094,7 +4094,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, */ btrfs_page_clear_dirty(fs_info, page, cur, iosize); - ret = submit_extent_page(opf | write_flags, wbc, + ret = submit_extent_page(op | write_flags, wbc, &epd->bio_ctrl, page, disk_bytenr, iosize, cur - page_offset(page), @@ -4575,7 +4575,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb, { struct btrfs_fs_info *fs_info = eb->fs_info; struct page *page = eb->pages[0]; - unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; + blk_opf_t write_flags = wbc_to_write_flags(wbc) | REQ_META; bool no_dirty_ebs = false; int ret; @@ -4620,7 +4620,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, { u64 disk_bytenr = eb->start; int i, num_pages; - unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; + blk_opf_t write_flags = wbc_to_write_flags(wbc) | REQ_META; int ret = 0; prepare_eb_write(eb); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 05e0c4a5affd..f8378c949be4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -485,7 +485,7 @@ struct async_chunk { struct page *locked_page; u64 start; u64 end; - unsigned int write_flags; + blk_opf_t write_flags; struct list_head extents; struct cgroup_subsys_state *blkcg_css; struct btrfs_work work; @@ -1435,7 +1435,7 @@ static int cow_file_range_async(struct btrfs_inode *inode, int i; bool should_compress; unsigned nofs_flag; - const unsigned int write_flags = wbc_to_write_flags(wbc); + const blk_opf_t write_flags = wbc_to_write_flags(wbc); unlock_extent(&inode->io_tree, start, end); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a5b623ee6fac..c520412d1f86 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1136,7 +1136,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio, unsigned int stripe_nr, unsigned int sector_nr, unsigned long bio_max_len, - unsigned int opf) + enum req_op op) { const u32 sectorsize = rbio->bioc->fs_info->sectorsize; struct bio *last = bio_list->tail; @@ -1181,7 +1181,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio, /* put a new bio on the list */ bio = bio_alloc(stripe->dev->bdev, max(bio_max_len >> PAGE_SHIFT, 1UL), - opf, GFP_NOFS); + op, GFP_NOFS); bio->bi_iter.bi_sector = disk_start >> 9; bio->bi_private = rbio; From 67c0f556302cfcdb5b5fb7933afa08cb1de75b36 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:17 -0700 Subject: [PATCH 142/178] fs/ext4: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for variables that represent request flags. Cc: Theodore Ts'o Cc: Baokun Li Cc: Ye Bin Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-52-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/ext4/ext4.h | 8 ++++---- fs/ext4/fast_commit.c | 2 +- fs/ext4/super.c | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 75b8d81b2469..29fc575a4eb6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3058,14 +3058,14 @@ extern unsigned int ext4_list_backups(struct super_block *sb, /* super.c */ extern struct buffer_head *ext4_sb_bread(struct super_block *sb, - sector_t block, int op_flags); + sector_t block, blk_opf_t op_flags); extern struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, sector_t block); -extern void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, +extern void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io); -extern int ext4_read_bh(struct buffer_head *bh, int op_flags, +extern int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io); -extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait); +extern int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait); extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block); extern int ext4_seq_options_show(struct seq_file *seq, void *offset); extern int ext4_calculate_overhead(struct super_block *sb); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0df5482c6c1c..eb4c8ad1bb61 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -658,7 +658,7 @@ void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t star static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) { - int write_flags = REQ_SYNC; + blk_opf_t write_flags = REQ_SYNC; struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh; /* Add REQ_FUA | REQ_PREFLUSH only its tail */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 24922184b622..2c68dec63e54 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -159,7 +159,7 @@ MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) -static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags, +static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) { /* @@ -174,7 +174,7 @@ static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags, submit_bh(REQ_OP_READ | op_flags, bh); } -void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, +void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) { BUG_ON(!buffer_locked(bh)); @@ -186,7 +186,7 @@ void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, __ext4_read_bh(bh, op_flags, end_io); } -int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io) +int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) { BUG_ON(!buffer_locked(bh)); @@ -203,7 +203,7 @@ int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io) return -EIO; } -int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait) +int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait) { if (trylock_buffer(bh)) { if (wait) @@ -227,8 +227,8 @@ int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait) * return. */ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, - sector_t block, int op_flags, - gfp_t gfp) + sector_t block, + blk_opf_t op_flags, gfp_t gfp) { struct buffer_head *bh; int ret; @@ -248,7 +248,7 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, } struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, - int op_flags) + blk_opf_t op_flags) { return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE); } From 7649c873c16a384d447f7dbf9b153e333159f914 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:18 -0700 Subject: [PATCH 143/178] fs/f2fs: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Cc: Jaegeuk Kim Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-53-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/f2fs/data.c | 11 ++++++----- fs/f2fs/f2fs.h | 6 +++--- fs/f2fs/node.c | 2 +- fs/f2fs/segment.c | 2 +- include/trace/events/f2fs.h | 22 +++++++++++----------- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7fcbcf979737..5c13ee321940 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -387,11 +387,11 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) return 0; } -static unsigned int f2fs_io_flags(struct f2fs_io_info *fio) +static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1; unsigned int fua_flag, meta_flag, io_flag; - unsigned int op_flags = 0; + blk_opf_t op_flags = 0; if (fio->op != REQ_OP_WRITE) return 0; @@ -999,7 +999,7 @@ out: } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages, unsigned op_flag, + unsigned nr_pages, blk_opf_t op_flag, pgoff_t first_idx, bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -1047,7 +1047,8 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, - block_t blkaddr, int op_flags, bool for_write) + block_t blkaddr, blk_opf_t op_flags, + bool for_write) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct bio *bio; @@ -1181,7 +1182,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index) } struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - int op_flags, bool for_write) + blk_opf_t op_flags, bool for_write) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d9bbecd008d2..868170b72de9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1183,8 +1183,8 @@ struct f2fs_io_info { nid_t ino; /* inode number */ enum page_type type; /* contains DATA/NODE/META/META_FLUSH */ enum temp_type temp; /* contains HOT/WARM/COLD */ - int op; /* contains REQ_OP_ */ - int op_flags; /* req_flag_bits */ + enum req_op op; /* contains REQ_OP_ */ + blk_opf_t op_flags; /* req_flag_bits */ block_t new_blkaddr; /* new block address to be written */ block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ @@ -3741,7 +3741,7 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - int op_flags, bool for_write); + blk_opf_t op_flags, bool for_write); struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, bool for_write); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index cf6f7fc83c08..04a145f1dcfc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1327,7 +1327,7 @@ fail: * 0: f2fs_put_page(page, 0) * LOCKED_PAGE or error: f2fs_put_page(page, 1) */ -static int read_node_page(struct page *page, int op_flags) +static int read_node_page(struct page *page, blk_opf_t op_flags) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 874c1b9c41a2..c7afc588cf26 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1082,7 +1082,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); - int flag = dpolicy->sync ? REQ_SYNC : 0; + blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0; block_t lstart, start, len, total_len; int err = 0; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 513e889ef8aa..f1e922237736 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -66,7 +66,7 @@ TRACE_DEFINE_ENUM(CP_RESIZE); #define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_PREFLUSH | REQ_FUA) -#define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS) +#define F2FS_BIO_FLAG_MASK(t) (__force u32)((t) & F2FS_OP_FLAGS) #define show_bio_type(op,op_flags) show_bio_op(op), \ show_bio_op_flags(op_flags) @@ -75,12 +75,12 @@ TRACE_DEFINE_ENUM(CP_RESIZE); #define show_bio_op_flags(flags) \ __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ - { REQ_RAHEAD, "R" }, \ - { REQ_SYNC, "S" }, \ - { REQ_META, "M" }, \ - { REQ_PRIO, "P" }, \ - { REQ_PREFLUSH, "PF" }, \ - { REQ_FUA, "FUA" }) + { (__force u32)REQ_RAHEAD, "R" }, \ + { (__force u32)REQ_SYNC, "S" }, \ + { (__force u32)REQ_META, "M" }, \ + { (__force u32)REQ_PRIO, "P" }, \ + { (__force u32)REQ_PREFLUSH, "PF" }, \ + { (__force u32)REQ_FUA, "FUA" }) #define show_data_type(type) \ __print_symbolic(type, \ @@ -1036,8 +1036,8 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __field(pgoff_t, index) __field(block_t, old_blkaddr) __field(block_t, new_blkaddr) - __field(int, op) - __field(int, op_flags) + __field(enum req_op, op) + __field(blk_opf_t, op_flags) __field(int, temp) __field(int, type) ), @@ -1092,8 +1092,8 @@ DECLARE_EVENT_CLASS(f2fs__bio, TP_STRUCT__entry( __field(dev_t, dev) __field(dev_t, target) - __field(int, op) - __field(int, op_flags) + __field(enum req_op, op) + __field(blk_opf_t, op_flags) __field(int, type) __field(sector_t, sector) __field(unsigned int, size) From 67688c08b7e5e9f8f945b22fb460a31ed3feb880 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:19 -0700 Subject: [PATCH 144/178] fs/gfs2: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Combine the first two gfs2_submit_bhs() arguments into a single argument. Reviewed-by: Andreas Gruenbacher Cc: Bob Peterson Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-54-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/gfs2/log.c | 4 ++-- fs/gfs2/log.h | 2 +- fs/gfs2/lops.c | 4 ++-- fs/gfs2/lops.h | 2 +- fs/gfs2/meta_io.c | 9 ++++----- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f0ee3ff6f9a8..eec4159b08aa 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -823,7 +823,7 @@ void gfs2_flush_revokes(struct gfs2_sbd *sdp) void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, - int op_flags) + blk_opf_t op_flags) { struct gfs2_log_header *lh; u32 hash, crc; @@ -905,7 +905,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, static void log_write_header(struct gfs2_sbd *sdp, u32 flags) { - int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; + blk_opf_t op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index fc905c2af53c..653cffcbf869 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -82,7 +82,7 @@ extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr, unsigned int *extra_revokes); extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, u64 seq, u32 tail, u32 lblock, u32 flags, - int op_flags); + blk_opf_t op_flags); extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6ba51cbb94cf..90a2d7bc91c4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -238,7 +238,7 @@ static void gfs2_end_log_write(struct bio *bio) * there is no pending bio, then this is a no-op. */ -void gfs2_log_submit_bio(struct bio **biop, int opf) +void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf) { struct bio *bio = *biop; if (bio) { @@ -292,7 +292,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, */ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno, - struct bio **biop, int op, + struct bio **biop, enum req_op op, bio_end_io_t *end_io, bool flush) { struct bio *bio = *biop; diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index f707601597dc..1412ffba1d44 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h @@ -16,7 +16,7 @@ extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn); extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, struct page *page, unsigned size, unsigned offset, u64 blkno); -extern void gfs2_log_submit_bio(struct bio **biop, int opf); +extern void gfs2_log_submit_bio(struct bio **biop, blk_opf_t opf); extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, bool keep_cache); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 3570739f005d..7e70e0ba5a6c 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -34,7 +34,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); + blk_opf_t write_flags = REQ_META | REQ_PRIO | wbc_to_write_flags(wbc); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -217,14 +217,13 @@ static void gfs2_meta_read_endio(struct bio *bio) * Submit several consecutive buffer head I/O requests as a single bio I/O * request. (See submit_bh_wbc.) */ -static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[], - int num) +static void gfs2_submit_bhs(blk_opf_t opf, struct buffer_head *bhs[], int num) { while (num > 0) { struct buffer_head *bh = *bhs; struct bio *bio; - bio = bio_alloc(bh->b_bdev, num, op | op_flags, GFP_NOIO); + bio = bio_alloc(bh->b_bdev, num, opf, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); while (num > 0) { bh = *bhs; @@ -288,7 +287,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } } - gfs2_submit_bhs(REQ_OP_READ, REQ_META | REQ_PRIO, bhs, num); + gfs2_submit_bhs(REQ_OP_READ | REQ_META | REQ_PRIO, bhs, num); if (!(flags & DIO_WAIT)) return 0; From c85f99929ea66c357199b6a3fe958745e1190f5a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:20 -0700 Subject: [PATCH 145/178] fs/hfsplus: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Combine the last two hfsplus_submit_bio() arguments into a single argument. Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-55-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/hfsplus/hfsplus_fs.h | 2 +- fs/hfsplus/part_tbl.c | 5 ++--- fs/hfsplus/super.c | 4 ++-- fs/hfsplus/wrapper.c | 12 ++++++------ 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 396e73aa0961..a5db2e3b2980 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -525,7 +525,7 @@ int hfsplus_compare_dentry(const struct dentry *dentry, unsigned int len, /* wrapper.c */ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, void *buf, - void **data, int op, int op_flags); + void **data, blk_opf_t opf); int hfsplus_read_wrapper(struct super_block *sb); /* diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 63164ebc52fa..9ec21664eda6 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c @@ -112,8 +112,7 @@ static int hfs_parse_new_pmap(struct super_block *sb, void *buf, if ((u8 *)pm - (u8 *)buf >= buf_size) { res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK + i, - buf, (void **)&pm, REQ_OP_READ, - 0); + buf, (void **)&pm, REQ_OP_READ); if (res) return res; } @@ -137,7 +136,7 @@ int hfs_part_find(struct super_block *sb, return -ENOMEM; res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK, - buf, &data, REQ_OP_READ, 0); + buf, &data, REQ_OP_READ); if (res) goto out; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 8479add998b5..122ed89ebf9f 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -221,7 +221,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, - sbi->s_vhdr_buf, NULL, REQ_OP_WRITE, + sbi->s_vhdr_buf, NULL, REQ_OP_WRITE | REQ_SYNC); if (!error) error = error2; @@ -230,7 +230,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) error2 = hfsplus_submit_bio(sb, sbi->part_start + sbi->sect_count - 2, - sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE, + sbi->s_backup_vhdr_buf, NULL, REQ_OP_WRITE | REQ_SYNC); if (!error) error2 = error; diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 0b8ad6586df5..0b791adf02e5 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -45,8 +45,9 @@ struct hfsplus_wd { * will work correctly. */ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, - void *buf, void **data, int op, int op_flags) + void *buf, void **data, blk_opf_t opf) { + const enum req_op op = opf & REQ_OP_MASK; struct bio *bio; int ret = 0; u64 io_size; @@ -63,10 +64,10 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, offset = start & (io_size - 1); sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1); - bio = bio_alloc(sb->s_bdev, 1, op | op_flags, GFP_NOIO); + bio = bio_alloc(sb->s_bdev, 1, opf, GFP_NOIO); bio->bi_iter.bi_sector = sector; - if (op != WRITE && data) + if (op != REQ_OP_WRITE && data) *data = (u8 *)buf + offset; while (io_size > 0) { @@ -184,7 +185,7 @@ int hfsplus_read_wrapper(struct super_block *sb) reread: error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, sbi->s_vhdr_buf, (void **)&sbi->s_vhdr, - REQ_OP_READ, 0); + REQ_OP_READ); if (error) goto out_free_backup_vhdr; @@ -216,8 +217,7 @@ reread: error = hfsplus_submit_bio(sb, part_start + part_size - 2, sbi->s_backup_vhdr_buf, - (void **)&sbi->s_backup_vhdr, REQ_OP_READ, - 0); + (void **)&sbi->s_backup_vhdr, REQ_OP_READ); if (error) goto out_free_backup_vhdr; From dbd4eb8148f694ae300fe9682b505acf53053f6e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:21 -0700 Subject: [PATCH 146/178] fs/iomap: Use the new blk_opf_t type Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for the combination of a request operation and request flags. Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-56-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/iomap/direct-io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 5d098adba443..18a3d9357dce 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -52,7 +52,7 @@ struct iomap_dio { }; static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter, - struct iomap_dio *dio, unsigned short nr_vecs, unsigned int opf) + struct iomap_dio *dio, unsigned short nr_vecs, blk_opf_t opf) { if (dio->dops && dio->dops->bio_set) return bio_alloc_bioset(iter->iomap.bdev, nr_vecs, opf, @@ -212,10 +212,10 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio, * mapping, and whether or not we want FUA. Note that we can end up * clearing the WRITE_FUA flag in the dio request. */ -static inline unsigned int iomap_dio_bio_opflags(struct iomap_dio *dio, +static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio, const struct iomap *iomap, bool use_fua) { - unsigned int opflags = REQ_SYNC | REQ_IDLE; + blk_opf_t opflags = REQ_SYNC | REQ_IDLE; if (!(dio->flags & IOMAP_DIO_WRITE)) { WARN_ON_ONCE(iomap->flags & IOMAP_F_ZONE_APPEND); @@ -244,7 +244,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, unsigned int fs_block_size = i_blocksize(inode), pad; loff_t length = iomap_length(iter); loff_t pos = iter->pos; - unsigned int bio_opf; + blk_opf_t bio_opf; struct bio *bio; bool need_zeroout = false; bool use_fua = false; From 6669797b0dd41ced457760b6e1014fdda8ce19ce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:22 -0700 Subject: [PATCH 147/178] fs/jbd2: Fix the documentation of the jbd2_write_superblock() callers Commit 2a222ca992c3 ("fs: have submit_bh users pass in op and flags separately") renamed the jbd2_write_superblock() 'write_op' argument into 'write_flags'. Propagate this change to the jbd2_write_superblock() callers. Additionally, change the type of 'write_flags' into blk_opf_t. Cc: Mike Christie Cc: Theodore Ts'o Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-57-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/jbd2/journal.c | 15 ++++++++------- include/linux/jbd2.h | 2 +- include/trace/events/jbd2.h | 12 ++++++------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 07e6aaf7e213..2a1b9da7c3e3 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1604,7 +1604,7 @@ static int journal_reset(journal_t *journal) * This function expects that the caller will have locked the journal * buffer head, and will return with it unlocked */ -static int jbd2_write_superblock(journal_t *journal, int write_flags) +static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags) { struct buffer_head *bh = journal->j_sb_buffer; journal_superblock_t *sb = journal->j_superblock; @@ -1661,13 +1661,14 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) * @journal: The journal to update. * @tail_tid: TID of the new transaction at the tail of the log * @tail_block: The first block of the transaction at the tail of the log - * @write_op: With which operation should we write the journal sb + * @write_flags: Flags for the journal sb write operation * * Update a journal's superblock information about log tail and write it to * disk, waiting for the IO to complete. */ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, - unsigned long tail_block, int write_op) + unsigned long tail_block, + blk_opf_t write_flags) { journal_superblock_t *sb = journal->j_superblock; int ret; @@ -1687,7 +1688,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, sb->s_sequence = cpu_to_be32(tail_tid); sb->s_start = cpu_to_be32(tail_block); - ret = jbd2_write_superblock(journal, write_op); + ret = jbd2_write_superblock(journal, write_flags); if (ret) goto out; @@ -1704,12 +1705,12 @@ out: /** * jbd2_mark_journal_empty() - Mark on disk journal as empty. * @journal: The journal to update. - * @write_op: With which operation should we write the journal sb + * @write_flags: Flags for the journal sb write operation * * Update a journal's dynamic superblock fields to show that journal is empty. * Write updated superblock to disk waiting for IO to complete. */ -static void jbd2_mark_journal_empty(journal_t *journal, int write_op) +static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags) { journal_superblock_t *sb = journal->j_superblock; bool had_fast_commit = false; @@ -1735,7 +1736,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) had_fast_commit = true; } - jbd2_write_superblock(journal, write_op); + jbd2_write_superblock(journal, write_flags); if (had_fast_commit) jbd2_set_feature_fast_commit(journal); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index e79d6e0b14e8..dc1724131300 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1557,7 +1557,7 @@ extern int jbd2_journal_wipe (journal_t *, int); extern int jbd2_journal_skip_recovery (journal_t *); extern void jbd2_journal_update_sb_errno(journal_t *); extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t, - unsigned long, int); + unsigned long, blk_opf_t); extern void jbd2_journal_abort (journal_t *, int); extern int jbd2_journal_errno (journal_t *); extern void jbd2_journal_ack_err (journal_t *); diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index a4dfe005983d..99f783c384bb 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -355,22 +355,22 @@ TRACE_EVENT(jbd2_update_log_tail, TRACE_EVENT(jbd2_write_superblock, - TP_PROTO(journal_t *journal, int write_op), + TP_PROTO(journal_t *journal, blk_opf_t write_flags), - TP_ARGS(journal, write_op), + TP_ARGS(journal, write_flags), TP_STRUCT__entry( __field( dev_t, dev ) - __field( int, write_op ) + __field( blk_opf_t, write_flags ) ), TP_fast_assign( __entry->dev = journal->j_fs_dev->bd_dev; - __entry->write_op = write_op; + __entry->write_flags = write_flags; ), - TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->write_op) + TP_printk("dev %d,%d write_flags %x", MAJOR(__entry->dev), + MINOR(__entry->dev), (__force u32)__entry->write_flags) ); TRACE_EVENT(jbd2_lock_buffer_stall, From 5d12ce77e1e677590de13468fe1a497388de3a9e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:23 -0700 Subject: [PATCH 148/178] fs/nfs: Use enum req_op where appropriate Improve static type checking by using enum req_op for request operations. Rename an 'rw' argument into 'op' since that name is typically used for request operations. This patch does not change any functionality. Note: REQ_OP_READ = READ = 0 and REQ_OP_WRITE = WRITE = 1. Cc: Trond Myklebust Cc: Anna Schumaker Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-58-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/nfs/blocklayout/blocklayout.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 79a8b451791f..943aeea1eb16 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -121,7 +121,7 @@ static bool offset_in_map(u64 offset, struct pnfs_block_dev_map *map) } static struct bio * -do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, +do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect, struct page *page, struct pnfs_block_dev_map *map, struct pnfs_block_extent *be, bio_end_io_t end_io, struct parallel_io *par, unsigned int offset, int *len) @@ -131,7 +131,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, u64 disk_addr, end; dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__, - npg, rw, (unsigned long long)isect, offset, *len); + npg, (__force u32)op, (unsigned long long)isect, offset, *len); /* translate to device offset */ isect += be->be_v_offset; @@ -154,7 +154,7 @@ do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, retry: if (!bio) { - bio = bio_alloc(map->bdev, bio_max_segs(npg), rw, GFP_NOIO); + bio = bio_alloc(map->bdev, bio_max_segs(npg), op, GFP_NOIO); bio->bi_iter.bi_sector = disk_addr >> SECTOR_SHIFT; bio->bi_end_io = end_io; bio->bi_private = par; @@ -291,7 +291,7 @@ bl_read_pagelist(struct nfs_pgio_header *header) } else { bio = do_add_page_to_bio(bio, header->page_array.npages - i, - READ, + REQ_OP_READ, isect, pages[i], &map, &be, bl_end_io_read, par, pg_offset, &pg_len); @@ -420,9 +420,8 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync) pg_len = PAGE_SIZE; bio = do_add_page_to_bio(bio, header->page_array.npages - i, - WRITE, isect, pages[i], &map, &be, - bl_end_io_write, par, - 0, &pg_len); + REQ_OP_WRITE, isect, pages[i], &map, + &be, bl_end_io_write, par, 0, &pg_len); if (IS_ERR(bio)) { header->pnfs_error = PTR_ERR(bio); bio = NULL; From ed4512590bd5839f8ea9eef1626b0f4db626b1d1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:24 -0700 Subject: [PATCH 149/178] fs/nilfs2: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Combine the 'mode' and 'mode_flags' arguments of nilfs_btnode_submit_block into a single argument 'opf'. Reviewed-by: Ryusuke Konishi Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-59-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/nilfs2/btnode.c | 8 ++++---- fs/nilfs2/btnode.h | 4 ++-- fs/nilfs2/btree.c | 6 +++--- fs/nilfs2/gcinode.c | 5 ++--- fs/nilfs2/mdt.c | 19 ++++++++++--------- include/trace/events/nilfs2.h | 4 ++-- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 5c39efbf733f..e74fda212620 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -70,7 +70,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, int mode, int mode_flags, + sector_t pblocknr, blk_opf_t opf, struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; @@ -103,13 +103,13 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, } } - if (mode_flags & REQ_RAHEAD) { + if (opf & REQ_RAHEAD) { if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { err = -EBUSY; /* internal code */ brelse(bh); goto out_locked; } - } else { /* mode == READ */ + } else { /* opf == REQ_OP_READ */ lock_buffer(bh); } if (buffer_uptodate(bh)) { @@ -122,7 +122,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode | mode_flags, bh); + submit_bh(opf, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index bd5544e63a01..4bc5612dff94 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -34,8 +34,8 @@ void nilfs_init_btnc_inode(struct inode *btnc_inode); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); -int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int, - int, struct buffer_head **, sector_t *); +int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, + blk_opf_t, struct buffer_head **, sector_t *); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index f544c22fff78..9f4d9432d38a 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -477,7 +477,7 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, sector_t submit_ptr = 0; int ret; - ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, + ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { if (ret != -EEXIST) @@ -495,8 +495,8 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); ret = nilfs_btnode_submit_block(btnc, ptr2, 0, - REQ_OP_READ, REQ_RAHEAD, - &ra_bh, &submit_ptr); + REQ_OP_READ | REQ_RAHEAD, + &ra_bh, &submit_ptr); if (likely(!ret || ret == -EEXIST)) brelse(ra_bh); else if (ret != -EBUSY) diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 847def8af315..b0d22ff24b67 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -129,9 +129,8 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode; int ret; - ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, - vbn ? : pbn, pbn, REQ_OP_READ, 0, - out_bh, &pbn); + ret = nilfs_btnode_submit_block(btnc_inode->i_mapping, vbn ? : pbn, pbn, + REQ_OP_READ, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 66e8811c2528..cbf4fa60eea2 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -111,8 +111,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, } static int -nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, - int mode, int mode_flags, struct buffer_head **out_bh) +nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf, + struct buffer_head **out_bh) { struct buffer_head *bh; __u64 blknum = 0; @@ -126,12 +126,12 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, if (buffer_uptodate(bh)) goto out; - if (mode_flags & REQ_RAHEAD) { + if (opf & REQ_RAHEAD) { if (!trylock_buffer(bh)) { ret = -EBUSY; goto failed_bh; } - } else /* mode == READ */ + } else /* opf == REQ_OP_READ */ lock_buffer(bh); if (buffer_uptodate(bh)) { @@ -148,10 +148,11 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode | mode_flags, bh); + submit_bh(opf, bh); ret = 0; - trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); + trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, + opf & REQ_OP_MASK); out: get_bh(bh); *out_bh = bh; @@ -172,7 +173,7 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; int err; - err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, 0, &first_bh); + err = nilfs_mdt_submit_block(inode, block, REQ_OP_READ, &first_bh); if (err == -EEXIST) /* internal code */ goto out; @@ -182,8 +183,8 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (readahead) { blkoff = block + 1; for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, REQ_OP_READ, - REQ_RAHEAD, &bh); + err = nilfs_mdt_submit_block(inode, blkoff, + REQ_OP_READ | REQ_RAHEAD, &bh); if (likely(!err || err == -EEXIST)) brelse(bh); else if (err != -EBUSY) diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index 84ee31fc04cc..8efc6236f57c 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -192,7 +192,7 @@ TRACE_EVENT(nilfs2_mdt_submit_block, TP_PROTO(struct inode *inode, unsigned long ino, unsigned long blkoff, - int mode), + enum req_op mode), TP_ARGS(inode, ino, blkoff, mode), @@ -200,7 +200,7 @@ TRACE_EVENT(nilfs2_mdt_submit_block, __field(struct inode *, inode) __field(unsigned long, ino) __field(unsigned long, blkoff) - __field(int, mode) + __field(enum req_op, mode) ), TP_fast_assign( From ce6b5315883448fbecfaca43b95d3bf2ed1d008c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:25 -0700 Subject: [PATCH 150/178] fs/ntfs3: Use enum req_op where appropriate Improve static type checking by using enum req_op instead of u32 for block layer request operations. Cc: Konstantin Komarov Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-60-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/ntfs3/fsntfs.c | 2 +- fs/ntfs3/ntfs_fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 3de5700a9b83..1835e35199c2 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -1448,7 +1448,7 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, */ int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, struct page **pages, u32 nr_pages, u64 vbo, u32 bytes, - u32 op) + enum req_op op) { int err = 0; struct bio *new, *bio = NULL; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 8de129a6419b..3a8abf13143e 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -617,7 +617,7 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, struct ntfs_buffers *nb, int sync); int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, struct page **pages, u32 nr_pages, u64 vbo, u32 bytes, - u32 op); + enum req_op op); int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run); int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u64 *lbo, u64 *bytes); From 61ba06c7069bfe1d2b66ab474ce0d6b4f5419d64 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:26 -0700 Subject: [PATCH 151/178] fs/ocfs2: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Combine the last two o2hb_setup_one_bio() arguments into a single argument. Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-61-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/ocfs2/cluster/heartbeat.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5f83c0c0918c..b13d344d40b6 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -501,8 +501,7 @@ static void o2hb_bio_end_io(struct bio *bio) static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, struct o2hb_bio_wait_ctxt *wc, unsigned int *current_slot, - unsigned int max_slots, int op, - int op_flags) + unsigned int max_slots, blk_opf_t opf) { int len, current_page; unsigned int vec_len, vec_start; @@ -516,7 +515,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, * GFP_KERNEL that the local node can get fenced. It would be * nicest if we could pre-allocate these bios and avoid this * all together. */ - bio = bio_alloc(reg->hr_bdev, 16, op | op_flags, GFP_ATOMIC); + bio = bio_alloc(reg->hr_bdev, 16, opf, GFP_ATOMIC); if (!bio) { mlog(ML_ERROR, "Could not alloc slots BIO!\n"); bio = ERR_PTR(-ENOMEM); @@ -564,7 +563,7 @@ static int o2hb_read_slots(struct o2hb_region *reg, while(current_slot < max_slots) { bio = o2hb_setup_one_bio(reg, &wc, ¤t_slot, max_slots, - REQ_OP_READ, 0); + REQ_OP_READ); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); @@ -596,8 +595,8 @@ static int o2hb_issue_node_write(struct o2hb_region *reg, slot = o2nm_this_node(); - bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, REQ_OP_WRITE, - REQ_SYNC); + bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1, + REQ_OP_WRITE | REQ_SYNC); if (IS_ERR(bio)) { status = PTR_ERR(bio); mlog_errno(status); From 568e34ed7339e357f73c8e1ae5cc4f4595805357 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:27 -0700 Subject: [PATCH 152/178] PM: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for variables that represent request flags. Combine the first two hib_submit_io() arguments into a single argument. Acked-by: Rafael J. Wysocki Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-62-bvanassche@acm.org Signed-off-by: Jens Axboe --- kernel/power/swap.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 91fffdd2c7fb..277434b6c0bf 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -269,15 +269,14 @@ static void hib_end_io(struct bio *bio) bio_put(bio); } -static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, - struct hib_bio_batch *hb) +static int hib_submit_io(blk_opf_t opf, pgoff_t page_off, void *addr, + struct hib_bio_batch *hb) { struct page *page = virt_to_page(addr); struct bio *bio; int error = 0; - bio = bio_alloc(hib_resume_bdev, 1, op | op_flags, - GFP_NOIO | __GFP_HIGH); + bio = bio_alloc(hib_resume_bdev, 1, opf, GFP_NOIO | __GFP_HIGH); bio->bi_iter.bi_sector = page_off * (PAGE_SIZE >> 9); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { @@ -317,8 +316,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) { int error; - hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, - swsusp_header, NULL); + hib_submit_io(REQ_OP_READ, swsusp_resume_block, swsusp_header, NULL); if (!memcmp("SWAP-SPACE",swsusp_header->sig, 10) || !memcmp("SWAPSPACE2",swsusp_header->sig, 10)) { memcpy(swsusp_header->orig_sig,swsusp_header->sig, 10); @@ -331,7 +329,7 @@ static int mark_swapfiles(struct swap_map_handle *handle, unsigned int flags) swsusp_header->flags = flags; if (flags & SF_CRC32_MODE) swsusp_header->crc32 = handle->crc32; - error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, + error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { pr_err("Swap header not found!\n"); @@ -408,7 +406,7 @@ static int write_page(void *buf, sector_t offset, struct hib_bio_batch *hb) } else { src = buf; } - return hib_submit_io(REQ_OP_WRITE, REQ_SYNC, offset, src, hb); + return hib_submit_io(REQ_OP_WRITE | REQ_SYNC, offset, src, hb); } static void release_swap_writer(struct swap_map_handle *handle) @@ -1003,7 +1001,7 @@ static int get_swap_reader(struct swap_map_handle *handle, return -ENOMEM; } - error = hib_submit_io(REQ_OP_READ, 0, offset, tmp->map, NULL); + error = hib_submit_io(REQ_OP_READ, offset, tmp->map, NULL); if (error) { release_swap_reader(handle); return error; @@ -1027,7 +1025,7 @@ static int swap_read_page(struct swap_map_handle *handle, void *buf, offset = handle->cur->entries[handle->k]; if (!offset) return -EFAULT; - error = hib_submit_io(REQ_OP_READ, 0, offset, buf, hb); + error = hib_submit_io(REQ_OP_READ, offset, buf, hb); if (error) return error; if (++handle->k >= MAP_PAGE_ENTRIES) { @@ -1526,8 +1524,7 @@ int swsusp_check(void) if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); - error = hib_submit_io(REQ_OP_READ, 0, - swsusp_resume_block, + error = hib_submit_io(REQ_OP_READ, swsusp_resume_block, swsusp_header, NULL); if (error) goto put; @@ -1535,7 +1532,7 @@ int swsusp_check(void) if (!memcmp(HIBERNATE_SIG, swsusp_header->sig, 10)) { memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10); /* Reset swap signature now */ - error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, + error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { @@ -1586,11 +1583,11 @@ int swsusp_unmark(void) { int error; - hib_submit_io(REQ_OP_READ, 0, swsusp_resume_block, - swsusp_header, NULL); + hib_submit_io(REQ_OP_READ, swsusp_resume_block, + swsusp_header, NULL); if (!memcmp(HIBERNATE_SIG,swsusp_header->sig, 10)) { memcpy(swsusp_header->sig,swsusp_header->orig_sig, 10); - error = hib_submit_io(REQ_OP_WRITE, REQ_SYNC, + error = hib_submit_io(REQ_OP_WRITE | REQ_SYNC, swsusp_resume_block, swsusp_header, NULL); } else { From d03025aef8676e826b69f8e3ec9bb59a5ad0c31d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:28 -0700 Subject: [PATCH 153/178] fs/xfs: Use the enum req_op and blk_opf_t types Improve static type checking by using the enum req_op type for variables that represent a request operation and the new blk_opf_t type for the combination of a request operation with request flags. Reviewed-by: Darrick J. Wong Cc: Christoph Hellwig Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-63-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/xfs/xfs_bio_io.c | 2 +- fs/xfs/xfs_buf.c | 4 ++-- fs/xfs/xfs_linux.h | 2 +- fs/xfs/xfs_log_recover.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index ae4345b37621..fe21c76f75b8 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -15,7 +15,7 @@ xfs_rw_bdev( sector_t sector, unsigned int count, char *data, - unsigned int op) + enum req_op op) { unsigned int is_vmalloc = is_vmalloc_addr(data); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index bf4e60871068..5e8f40d8c052 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1416,7 +1416,7 @@ xfs_buf_ioapply_map( int map, int *buf_offset, int *count, - int op) + blk_opf_t op) { int page_index; unsigned int total_nr_pages = bp->b_page_count; @@ -1493,7 +1493,7 @@ _xfs_buf_ioapply( struct xfs_buf *bp) { struct blk_plug plug; - int op; + blk_opf_t op; int offset; int size; int i; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index cb9105d667db..f9878021e7d0 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -196,7 +196,7 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) } int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, - char *data, unsigned int op); + char *data, enum req_op op); #define ASSERT_ALWAYS(expr) \ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__)) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 5f7e4e6e33ce..940c8107cbd4 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -122,7 +122,7 @@ xlog_do_io( xfs_daddr_t blk_no, unsigned int nbblks, char *data, - unsigned int op) + enum req_op op) { int error; From e46b5970496705127f9ae494c66e0242773097e8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:29 -0700 Subject: [PATCH 154/178] fs/zonefs: Use the enum req_op type for tracing request operations Improve static type checking by using the enum req_op type for request operations. Reviewed-by: Johannes Thumshirn Reviewed-by: Damien Le Moal Cc: Naohiro Aota Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-64-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/zonefs/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/zonefs/trace.h b/fs/zonefs/trace.h index 21501da764bd..42edcfd393ed 100644 --- a/fs/zonefs/trace.h +++ b/fs/zonefs/trace.h @@ -25,7 +25,7 @@ TRACE_EVENT(zonefs_zone_mgmt, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(int, op) + __field(enum req_op, op) __field(sector_t, sector) __field(sector_t, nr_sectors) ), From f54541403b2f51d98aa65472ddb021b1ef7d1eed Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jul 2022 11:47:34 -0700 Subject: [PATCH 155/178] fs/buffer: Fix the ll_rw_block() kernel-doc header Bring the ll_rw_block() kernel-doc header again in sync with the function prototype. Reported-by: Stephen Rothwell Cc: Alexander Viro Cc: Jan Kara Cc: Stephen Rothwell Fixes: 1420c4a549bf ("fs/buffer: Combine two submit_bh() and ll_rw_block() arguments") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220715184735.2326034-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- fs/buffer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index af53569930bb..82de136b83bb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3049,14 +3049,13 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @op: whether to %READ or %WRITE - * @op_flags: req_flag_bits + * @opf: block layer request operation and flags. * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE. - * @op_flags contains flags modifying the detailed I/O behavior, most notably + * @opf contains flags modifying the detailed I/O behavior, most notably * %REQ_RAHEAD. * * This function drops any buffer that it cannot get a lock on (with the From 020e3618cc81abf11fe6bffaac27861ff94707ce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Jul 2022 11:47:35 -0700 Subject: [PATCH 156/178] blktrace: Fix the blk_fill_rwbs() kernel-doc header Reflect recent changes in the blk_fill_rwbs() kernel-doc header. Reported-by: Stephen Rothwell Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Li Zefan Cc: Chaitanya Kulkarni Cc: Stephen Rothwell Fixes: 919dbca8670d ("blktrace: Use the new blk_opf_t type") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220715184735.2326034-3-bvanassche@acm.org Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 150058f5daa9..7f5eb295fe19 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1873,11 +1873,11 @@ out: /** * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string. * @rwbs: buffer to be filled - * @op: REQ_OP_XXX for the tracepoint + * @opf: request operation type (REQ_OP_XXX) and flags for the tracepoint * * Description: - * Maps the REQ_OP_XXX to character and fills the buffer provided by the - * caller with resulting string. + * Maps each request operation and flag to a single character and fills the + * buffer provided by the caller with resulting string. * **/ void blk_fill_rwbs(char *rwbs, blk_opf_t opf) From f2450f8a2c1ec3e88d6674f747b913aa5f21fa59 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 16 Jul 2022 17:53:44 +0800 Subject: [PATCH 157/178] ublk_drv: fix build warning with -Wmaybe-uninitialized and one sparse warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After applying -Wmaybe-uninitialized manually, two build warnings are triggered: drivers/block/ublk_drv.c:940:11: warning: ‘io’ may be used uninitialized [-Wmaybe-uninitialized] 940 | io->flags &= ~UBLK_IO_FLAG_ACTIVE; drivers/block/ublk_drv.c: In function ‘ublk_ctrl_uring_cmd’: drivers/block/ublk_drv.c:1531:9: warning: ‘ret’ may be used uninitialized [-Wmaybe-uninitialized] Fix the 1st one by removing 'io->flags &= ~UBLK_IO_FLAG_ACTIVE;' which isn't needed since the function always return successfully after setting this flag. Fix the 2nd one by always initializing 'ret'. Also fix another sparse warning of 'sparse: sparse: incorrect type in return expression' by changing return type of ublk_setup_iod(). Reported-by: kernel test robot Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220716095344.222674-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f10c4319dc1f..2c1b01d7f27d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -407,7 +407,7 @@ static inline unsigned int ublk_req_build_flags(struct request *req) return flags; } -static int ublk_setup_iod(struct ublk_queue *ubq, struct request *req) +static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req) { struct ublksrv_io_desc *iod = ublk_get_iod(ubq, req->tag); struct ublk_io *io = &ubq->ios[req->tag]; @@ -937,7 +937,6 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; out: - io->flags &= ~UBLK_IO_FLAG_ACTIVE; io_uring_cmd_done(cmd, ret, 0); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); @@ -1299,13 +1298,12 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) struct ublk_device *ub; unsigned long queue; unsigned int retlen; - int ret; + int ret = -EINVAL; ub = ublk_get_device_from_id(header->dev_id); if (!ub) goto out; - ret = -EINVAL; queue = header->data[0]; if (queue >= ub->dev_info.nr_hw_queues) goto out; From f50e5d670c622349277a46996a70386cc3661b10 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 18 Jul 2022 12:24:08 +0800 Subject: [PATCH 158/178] ublk_drv: fix missing error return code in ublk_add_dev() If blk_mq_init_queue() fails, it should return error code in ublk_add_dev() Fixes: cebbe577cb17 ("ublk_drv: fix request queue leak") Signed-off-by: Yang Yingliang Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220718042408.3132835-1-yangyingliang@huawei.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2c1b01d7f27d..663626167c0d 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1169,8 +1169,10 @@ static int ublk_add_dev(struct ublk_device *ub) goto out_deinit_queues; ub->ub_queue = blk_mq_init_queue(&ub->tag_set); - if (IS_ERR(ub->ub_queue)) + if (IS_ERR(ub->ub_queue)) { + err = PTR_ERR(ub->ub_queue); goto out_cleanup_tags; + } ub->ub_queue->queuedata = ub; disk = ub->ub_disk = blk_mq_alloc_disk_for_queue(ub->ub_queue, From 6b1439d203a3c3d7adcf31ba70734eb95f8fa02d Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 18 Jul 2022 09:54:31 +0800 Subject: [PATCH 159/178] ublk_drv: remove unneeded semicolon Eliminate the following coccicheck warnings: ./drivers/block/ublk_drv.c:1467:2-3: Unneeded semicolon ./drivers/block/ublk_drv.c:1528:2-3: Unneeded semicolon Reported-by: Abaci Robot Signed-off-by: Yang Li Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220718015431.40185-1-yang.lee@linux.alibaba.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 663626167c0d..42afab25864f 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1465,7 +1465,7 @@ static int ublk_ctrl_cmd_validate(struct io_uring_cmd *cmd, return -EINVAL; if (!header->addr) return -EINVAL; - }; + } return 0; } @@ -1526,7 +1526,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, break; default: break; - }; + } out: io_uring_cmd_done(cmd, ret, 0); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", From d276a22314c2bad9136c5e0b09eb3c8a560e1161 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jul 2022 08:30:13 +0200 Subject: [PATCH 160/178] ublk: remove UBLK_IO_F_INTEGRITY The ublk protocol has no mechanism to actually transfer the integrity metadata, so don't define this flag, which requires that an integrity payload is attached to a bio. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220718063013.335531-1-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 --- include/uapi/linux/ublk_cmd.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 42afab25864f..796d8230fb60 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -389,9 +389,6 @@ static inline unsigned int ublk_req_build_flags(struct request *req) if (req->cmd_flags & REQ_META) flags |= UBLK_IO_F_META; - if (req->cmd_flags & REQ_INTEGRITY) - flags |= UBLK_IO_F_INTEGRITY; - if (req->cmd_flags & REQ_FUA) flags |= UBLK_IO_F_FUA; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index a3f5e7c21807..d6879eea2fde 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -106,7 +106,6 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FAILFAST_TRANSPORT (1U << 9) #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) #define UBLK_IO_F_META (1U << 11) -#define UBLK_IO_F_INTEGRITY (1U << 12) #define UBLK_IO_F_FUA (1U << 13) #define UBLK_IO_F_PREFLUSH (1U << 14) #define UBLK_IO_F_NOUNMAP (1U << 15) From fe3333f6953848f1c24e91a1cf70eed026dc3a86 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 18 Jul 2022 14:14:09 +0300 Subject: [PATCH 161/178] ublk_drv: fix an IS_ERR() vs NULL check The blk_mq_alloc_disk_for_queue() doesn't return error pointers, it returns NULL on error. Fixes: cebbe577cb17 ("ublk_drv: fix request queue leak") Signed-off-by: Dan Carpenter Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/YtVAgedTsQVK1oTM@kili Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 796d8230fb60..b90481b295a7 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1174,8 +1174,8 @@ static int ublk_add_dev(struct ublk_device *ub) disk = ub->ub_disk = blk_mq_alloc_disk_for_queue(ub->ub_queue, &ublk_bio_compl_lkclass); - if (IS_ERR(disk)) { - err = PTR_ERR(disk); + if (!disk) { + err = -ENOMEM; goto out_free_request_queue; } From bf14fad19ffbb3d37a1bb1324f966973e7d4a7b6 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 19 Jul 2022 00:08:51 +0800 Subject: [PATCH 162/178] mmc: fix disk/queue leak in case of adding disk failure In case of adding disk failure, the disk needs to be released, otherwise disk/queue is leaked. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220718160851.312972-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/mmc/core/block.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index bda6c67ce93f..e08e22f0a7c5 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2505,10 +2505,11 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, dev_set_drvdata(&card->dev, md); ret = device_add_disk(md->parent, md->disk, mmc_disk_attr_groups); if (ret) - goto err_cleanup_queue; + goto err_put_disk; return md; - err_cleanup_queue: + err_put_disk: + put_disk(md->disk); blk_mq_free_tag_set(&md->queue.tag_set); err_kfree: kfree(md); From 14a6e2eb7df5c7897c15b109cba29ab0c4a791b6 Mon Sep 17 00:00:00 2001 From: Jinke Han Date: Wed, 20 Jul 2022 17:36:16 +0800 Subject: [PATCH 163/178] block: don't allow the same type rq_qos add more than once In our test of iocost, we encountered some list add/del corruptions of inner_walk list in ioc_timer_fn. The reason can be described as follows: cpu 0 cpu 1 ioc_qos_write ioc_qos_write ioc = q_to_ioc(queue); if (!ioc) { ioc = kzalloc(); ioc = q_to_ioc(queue); if (!ioc) { ioc = kzalloc(); ... rq_qos_add(q, rqos); } ... rq_qos_add(q, rqos); ... } When the io.cost.qos file is written by two cpus concurrently, rq_qos may be added to one disk twice. In that case, there will be two iocs enabled and running on one disk. They own different iocgs on their active list. In the ioc_timer_fn function, because of the iocgs from two iocs have the same root iocg, the root iocg's walk_list may be overwritten by each other and this leads to list add/del corruptions in building or destroying the inner_walk list. And so far, the blk-rq-qos framework works in case that one instance for one type rq_qos per queue by default. This patch make this explicit and also fix the crash above. Signed-off-by: Jinke Han Reviewed-by: Muchun Song Acked-by: Tejun Heo Cc: Link: https://lore.kernel.org/r/20220720093616.70584-1-hanjinke.666@bytedance.com Signed-off-by: Jens Axboe --- block/blk-iocost.c | 20 +++++++++++++------- block/blk-iolatency.c | 18 +++++++++++------- block/blk-rq-qos.h | 11 ++++++++++- block/blk-wbt.c | 12 +++++++++++- 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index b7082f2aed9c..7936e5f5821c 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2886,15 +2886,21 @@ static int blk_iocost_init(struct request_queue *q) * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ - rq_qos_add(q, rqos); + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free_ioc; + ret = blkcg_activate_policy(q, &blkcg_policy_iocost); - if (ret) { - rq_qos_del(q, rqos); - free_percpu(ioc->pcpu_stat); - kfree(ioc); - return ret; - } + if (ret) + goto err_del_qos; return 0; + +err_del_qos: + rq_qos_del(q, rqos); +err_free_ioc: + free_percpu(ioc->pcpu_stat); + kfree(ioc); + return ret; } static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 79745c6d8e15..e285152345a2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -771,19 +771,23 @@ int blk_iolatency_init(struct request_queue *q) rqos->ops = &blkcg_iolatency_ops; rqos->q = q; - rq_qos_add(q, rqos); - + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free; ret = blkcg_activate_policy(q, &blkcg_policy_iolatency); - if (ret) { - rq_qos_del(q, rqos); - kfree(blkiolat); - return ret; - } + if (ret) + goto err_qos_del; timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); return 0; + +err_qos_del: + rq_qos_del(q, rqos); +err_free: + kfree(blkiolat); + return ret; } static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 0e46052b018a..08b856570ad1 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) init_waitqueue_head(&rq_wait->wait); } -static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) +static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) { /* * No IO can be in-flight when adding rqos, so freeze queue, which @@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_freeze_queue(q); spin_lock_irq(&q->queue_lock); + if (rq_qos_id(q, rqos->id)) + goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; spin_unlock_irq(&q->queue_lock); @@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_debugfs_register_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } + + return 0; +ebusy: + spin_unlock_irq(&q->queue_lock); + blk_mq_unfreeze_queue(q); + return -EBUSY; + } static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index f2e4bf1dca47..a9982000b667 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q) { struct rq_wb *rwb; int i; + int ret; rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) @@ -846,7 +847,10 @@ int wbt_init(struct request_queue *q) /* * Assign rwb and add the stats callback. */ - rq_qos_add(q, &rwb->rqos); + ret = rq_qos_add(q, &rwb->rqos); + if (ret) + goto err_free; + blk_stat_add_callback(q, rwb->cb); rwb->min_lat_nsec = wbt_default_latency_nsec(q); @@ -855,4 +859,10 @@ int wbt_init(struct request_queue *q) wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); return 0; + +err_free: + blk_stat_free_callback(rwb->cb); + kfree(rwb); + return ret; + } From c229686b26ee5b371bdd7e637f2d18f191861c3e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:09 +0200 Subject: [PATCH 164/178] ublk: add a MAINTAINERS entry Make get_maintainers.pl work for ublk. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-2-hch@lst.de Signed-off-by: Jens Axboe --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index fe5daf141501..8a4bbca9f28f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20415,6 +20415,13 @@ F: Documentation/filesystems/ubifs-authentication.rst F: Documentation/filesystems/ubifs.rst F: fs/ubifs/ +UBLK USERSPACE BLOCK DRIVER +M: Ming Lei +L: linux-block@vger.kernel.org +S: Maintained +F: drivers/block/ublk_drv.c +F: include/uapi/linux/ublk_cmd.h + UCLINUX (M68KNOMMU AND COLDFIRE) M: Greg Ungerer L: linux-m68k@lists.linux-m68k.org From 5f8bcc837a9640ba4bf5e7b1d7f9b254ea029f47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:10 +0200 Subject: [PATCH 165/178] ublk: remove UBLK_IO_F_PREFLUSH REQ_PREFLUSH is turned into REQ_OP_FLUSH by the flush state machine and thus never seen by a blk-mq based driver. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-3-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 3 --- include/uapi/linux/ublk_cmd.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b90481b295a7..07913b5bccd9 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -392,9 +392,6 @@ static inline unsigned int ublk_req_build_flags(struct request *req) if (req->cmd_flags & REQ_FUA) flags |= UBLK_IO_F_FUA; - if (req->cmd_flags & REQ_PREFLUSH) - flags |= UBLK_IO_F_PREFLUSH; - if (req->cmd_flags & REQ_NOUNMAP) flags |= UBLK_IO_F_NOUNMAP; diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index d6879eea2fde..917580b34198 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -107,7 +107,6 @@ struct ublksrv_ctrl_dev_info { #define UBLK_IO_F_FAILFAST_DRIVER (1U << 10) #define UBLK_IO_F_META (1U << 11) #define UBLK_IO_F_FUA (1U << 13) -#define UBLK_IO_F_PREFLUSH (1U << 14) #define UBLK_IO_F_NOUNMAP (1U << 15) #define UBLK_IO_F_SWAP (1U << 16) From 49d686cceed2e3148ba2bd2dec7e09b86eba0337 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:11 +0200 Subject: [PATCH 166/178] ublk: remove the empty open and release block device operations No need to define empty versions, they can just be left out. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-4-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 07913b5bccd9..deabcb23ae2a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -208,19 +208,8 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) PAGE_SIZE); } -static int ublk_open(struct block_device *bdev, fmode_t mode) -{ - return 0; -} - -static void ublk_release(struct gendisk *disk, fmode_t mode) -{ -} - static const struct block_device_operations ub_fops = { .owner = THIS_MODULE, - .open = ublk_open, - .release = ublk_release, }; #define UBLK_MAX_PIN_PAGES 32 From fa362045564ea7641e7d48295b54f4eb4df689ea Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:12 +0200 Subject: [PATCH 167/178] ublk: simplify ublk_ch_open and ublk_ch_release fops->open and fops->release are always paired. Use simple atomic bit ops ot indicate if the device is opened instead of a count that can only be 0 and 1 and a useless cmpxchg loop in ublk_ch_release. Also don't bother clearing file->private_data is the file is about to be freed anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-5-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index deabcb23ae2a..1f7bbbc3276a 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -125,7 +125,8 @@ struct ublk_device { struct cdev cdev; struct device cdev_dev; - atomic_t ch_open_cnt; +#define UB_STATE_OPEN (1 << 0) + unsigned long state; int ub_number; struct mutex mutex; @@ -647,21 +648,17 @@ static int ublk_ch_open(struct inode *inode, struct file *filp) struct ublk_device *ub = container_of(inode->i_cdev, struct ublk_device, cdev); - if (atomic_cmpxchg(&ub->ch_open_cnt, 0, 1) == 0) { - filp->private_data = ub; - return 0; - } - return -EBUSY; + if (test_and_set_bit(UB_STATE_OPEN, &ub->state)) + return -EBUSY; + filp->private_data = ub; + return 0; } static int ublk_ch_release(struct inode *inode, struct file *filp) { struct ublk_device *ub = filp->private_data; - while (atomic_cmpxchg(&ub->ch_open_cnt, 1, 0) != 1) - cpu_relax(); - - filp->private_data = NULL; + clear_bit(UB_STATE_OPEN, &ub->state); return 0; } From 34d8f2bea52928626aefd6f7e0ba7e69f67c8e62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:13 +0200 Subject: [PATCH 168/178] ublk: cleanup ublk_ctrl_uring_cmd Move all per-command work into the per-command ublk_ctrl_* helpers instead of being split over those, ublk_ctrl_cmd_validate, and the main ublk_ctrl_uring_cmd handler. To facilitate that, the old ublk_ctrl_stop_dev function that just contained two function calls is folded into both callers. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20220721130916.1869719-6-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 238 +++++++++++++++++++-------------------- 1 file changed, 118 insertions(+), 120 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 1f7bbbc3276a..2032d677b9f1 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -813,13 +813,6 @@ static void ublk_stop_dev(struct ublk_device *ub) cancel_delayed_work_sync(&ub->monitor_work); } -static int ublk_ctrl_stop_dev(struct ublk_device *ub) -{ - ublk_stop_dev(ub); - cancel_work_sync(&ub->stop_work); - return 0; -} - static inline bool ublk_queue_ready(struct ublk_queue *ubq) { return ubq->nr_io_ready == ubq->q_depth; @@ -1205,8 +1198,8 @@ out_destroy_dev: static void ublk_remove(struct ublk_device *ub) { - ublk_ctrl_stop_dev(ub); - + ublk_stop_dev(ub); + cancel_work_sync(&ub->stop_work); cdev_device_del(&ub->cdev, &ub->cdev_dev); put_device(&ub->cdev_dev); } @@ -1227,36 +1220,45 @@ static struct ublk_device *ublk_get_device_from_id(int idx) return ub; } -static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) +static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; - int ret = -EINVAL; int ublksrv_pid = (int)header->data[0]; unsigned long dev_blocks = header->data[1]; + struct ublk_device *ub; + int ret = -EINVAL; if (ublksrv_pid <= 0) - return ret; + return -EINVAL; + + ub = ublk_get_device_from_id(header->dev_id); + if (!ub) + return -EINVAL; wait_for_completion_interruptible(&ub->completion); schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); mutex_lock(&ub->mutex); - if (!disk_live(ub->ub_disk)) { - /* We may get disk size updated */ - if (dev_blocks) { - ub->dev_info.dev_blocks = dev_blocks; - ublk_update_capacity(ub); - } - ub->dev_info.ublksrv_pid = ublksrv_pid; - ret = add_disk(ub->ub_disk); - if (!ret) - ub->dev_info.state = UBLK_S_DEV_LIVE; - } else { + if (disk_live(ub->ub_disk)) { ret = -EEXIST; + goto out_unlock; } - mutex_unlock(&ub->mutex); + /* We may get disk size updated */ + if (dev_blocks) { + ub->dev_info.dev_blocks = dev_blocks; + ublk_update_capacity(ub); + } + ub->dev_info.ublksrv_pid = ublksrv_pid; + ret = add_disk(ub->ub_disk); + if (ret) + goto out_unlock; + + ub->dev_info.state = UBLK_S_DEV_LIVE; +out_unlock: + mutex_unlock(&ub->mutex); + ublk_put_device(ub); return ret; } @@ -1281,6 +1283,13 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) unsigned long queue; unsigned int retlen; int ret = -EINVAL; + + if (header->len * BITS_PER_BYTE < nr_cpu_ids) + return -EINVAL; + if (header->len & (sizeof(unsigned long)-1)) + return -EINVAL; + if (!header->addr) + return -EINVAL; ub = ublk_get_device_from_id(header->dev_id); if (!ub) @@ -1311,38 +1320,64 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) return ret; } -static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_dev_info *info, - void __user *argp, int idx) +static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) { + pr_devel("%s: dev id %d flags %llx\n", __func__, + info->dev_id, info->flags[0]); + pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", + info->nr_hw_queues, info->queue_depth, + info->block_size, info->dev_blocks); +} + +static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + void __user *argp = (void __user *)(unsigned long)header->addr; + struct ublksrv_ctrl_dev_info info; struct ublk_device *ub; - int ret; + int ret = -EINVAL; + + if (header->len < sizeof(info) || !header->addr) + return -EINVAL; + if (header->queue_id != (u16)-1) { + pr_warn("%s: queue_id is wrong %x\n", + __func__, header->queue_id); + return -EINVAL; + } + if (copy_from_user(&info, argp, sizeof(info))) + return -EFAULT; + ublk_dump_dev_info(&info); + if (header->dev_id != info.dev_id) { + pr_warn("%s: dev id not match %u %u\n", + __func__, header->dev_id, info.dev_id); + return -EINVAL; + } ret = mutex_lock_killable(&ublk_ctl_mutex); if (ret) return ret; - ub = __ublk_create_dev(idx); - if (!IS_ERR_OR_NULL(ub)) { - memcpy(&ub->dev_info, info, sizeof(*info)); - - /* update device id */ - ub->dev_info.dev_id = ub->ub_number; - - ret = ublk_add_dev(ub); - if (!ret) { - if (copy_to_user(argp, &ub->dev_info, sizeof(*info))) { - ublk_remove(ub); - ret = -EFAULT; - } - } - } else { - if (IS_ERR(ub)) - ret = PTR_ERR(ub); - else - ret = -ENOMEM; + ub = __ublk_create_dev(header->dev_id); + if (IS_ERR(ub)) { + ret = PTR_ERR(ub); + goto out_unlock; } - mutex_unlock(&ublk_ctl_mutex); + memcpy(&ub->dev_info, &info, sizeof(info)); + + /* update device id */ + ub->dev_info.dev_id = ub->ub_number; + + ret = ublk_add_dev(ub); + if (ret) + goto out_unlock; + + if (copy_to_user(argp, &ub->dev_info, sizeof(info))) { + ublk_remove(ub); + ret = -EFAULT; + } +out_unlock: + mutex_unlock(&ublk_ctl_mutex); return ret; } @@ -1386,16 +1421,6 @@ static int ublk_ctrl_del_dev(int idx) return ret; } - -static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) -{ - pr_devel("%s: dev id %d flags %llx\n", __func__, - info->dev_id, info->flags[0]); - pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", - info->nr_hw_queues, info->queue_depth, - info->block_size, info->dev_blocks); -} - static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; @@ -1405,59 +1430,47 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd) header->data[0], header->addr, header->len); } -static int ublk_ctrl_cmd_validate(struct io_uring_cmd *cmd, - struct ublksrv_ctrl_dev_info *info) +static int ublk_ctrl_stop_dev(struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; - u32 cmd_op = cmd->cmd_op; - void __user *argp = (void __user *)(unsigned long)header->addr; + struct ublk_device *ub; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + ub = ublk_get_device_from_id(header->dev_id); + if (!ub) + return -EINVAL; - switch (cmd_op) { - case UBLK_CMD_GET_DEV_INFO: - if (header->len < sizeof(*info) || !header->addr) - return -EINVAL; - break; - case UBLK_CMD_ADD_DEV: - if (header->len < sizeof(*info) || !header->addr) - return -EINVAL; - if (copy_from_user(info, argp, sizeof(*info)) != 0) - return -EFAULT; - ublk_dump_dev_info(info); - if (header->dev_id != info->dev_id) { - printk(KERN_WARNING "%s: cmd %x, dev id not match %u %u\n", - __func__, cmd_op, header->dev_id, - info->dev_id); - return -EINVAL; - } - if (header->queue_id != (u16)-1) { - printk(KERN_WARNING "%s: cmd %x queue_id is wrong %x\n", - __func__, cmd_op, header->queue_id); - return -EINVAL; - } - break; - case UBLK_CMD_GET_QUEUE_AFFINITY: - if ((header->len * BITS_PER_BYTE) < nr_cpu_ids) - return -EINVAL; - if (header->len & (sizeof(unsigned long)-1)) - return -EINVAL; - if (!header->addr) - return -EINVAL; - } + ublk_stop_dev(ub); + cancel_work_sync(&ub->stop_work); + ublk_put_device(ub); return 0; } +static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd) +{ + struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; + void __user *argp = (void __user *)(unsigned long)header->addr; + struct ublk_device *ub; + int ret = 0; + + if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr) + return -EINVAL; + + ub = ublk_get_device_from_id(header->dev_id); + if (!ub) + return -EINVAL; + + if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info))) + ret = -EFAULT; + ublk_put_device(ub); + + return ret; +} + static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; - void __user *argp = (void __user *)(unsigned long)header->addr; - struct ublksrv_ctrl_dev_info info; - u32 cmd_op = cmd->cmd_op; - struct ublk_device *ub; int ret = -EINVAL; ublk_ctrl_cmd_dump(cmd); @@ -1465,38 +1478,23 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (!(issue_flags & IO_URING_F_SQE128)) goto out; - ret = ublk_ctrl_cmd_validate(cmd, &info); - if (ret) + ret = -EPERM; + if (!capable(CAP_SYS_ADMIN)) goto out; ret = -ENODEV; - switch (cmd_op) { + switch (cmd->cmd_op) { case UBLK_CMD_START_DEV: - ub = ublk_get_device_from_id(header->dev_id); - if (ub) { - ret = ublk_ctrl_start_dev(ub, cmd); - ublk_put_device(ub); - } + ret = ublk_ctrl_start_dev(cmd); break; case UBLK_CMD_STOP_DEV: - ub = ublk_get_device_from_id(header->dev_id); - if (ub) { - ret = ublk_ctrl_stop_dev(ub); - ublk_put_device(ub); - } + ret = ublk_ctrl_stop_dev(cmd); break; case UBLK_CMD_GET_DEV_INFO: - ub = ublk_get_device_from_id(header->dev_id); - if (ub) { - if (copy_to_user(argp, &ub->dev_info, sizeof(info))) - ret = -EFAULT; - else - ret = 0; - ublk_put_device(ub); - } + ret = ublk_ctrl_get_dev_info(cmd); break; case UBLK_CMD_ADD_DEV: - ret = ublk_ctrl_add_dev(&info, argp, header->dev_id); + ret = ublk_ctrl_add_dev(cmd); break; case UBLK_CMD_DEL_DEV: ret = ublk_ctrl_del_dev(header->dev_id); From cfee7e4de2870017a4cbfdcf2d17329cc025b742 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:14 +0200 Subject: [PATCH 169/178] ublk: fold __ublk_create_dev into ublk_ctrl_add_dev Fold __ublk_create_dev into its only caller to avoid the packing and unpacking of the return value into an ERR_PTR. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Reviewed-by: ZiyangZhang Link: https://lore.kernel.org/r/20220721130916.1869719-7-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 2032d677b9f1..b8ac7b508029 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1017,23 +1017,6 @@ static int __ublk_alloc_dev_number(struct ublk_device *ub, int idx) return err; } -static struct ublk_device *__ublk_create_dev(int idx) -{ - struct ublk_device *ub = NULL; - int ret; - - ub = kzalloc(sizeof(*ub), GFP_KERNEL); - if (!ub) - return ERR_PTR(-ENOMEM); - - ret = __ublk_alloc_dev_number(ub, idx); - if (ret < 0) { - kfree(ub); - return ERR_PTR(ret); - } - return ub; -} - static void __ublk_destroy_dev(struct ublk_device *ub) { spin_lock(&ublk_idr_lock); @@ -1357,9 +1340,14 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) if (ret) return ret; - ub = __ublk_create_dev(header->dev_id); - if (IS_ERR(ub)) { - ret = PTR_ERR(ub); + ret = -ENOMEM; + ub = kzalloc(sizeof(*ub), GFP_KERNEL); + if (!ub) + goto out_unlock; + + ret = __ublk_alloc_dev_number(ub, header->dev_id); + if (ret < 0) { + kfree(ub); goto out_unlock; } From c50061f0f1a90df72aaa87eb17c459fa77952ad1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:15 +0200 Subject: [PATCH 170/178] ublk: rewrite ublk_ctrl_get_queue_affinity to not rely on hctx->cpumask Looking at the hctxs and cpumap is not safe without at very last a RCU reference. It also requires the queue to be set up before starting the device, which leads to rather awkward life time rules. Instead rewrite ublk_ctrl_get_queue_affinity to just build the cpumask directly from the mq_map in the tag set, similar to hctx->cpumask is built. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-8-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 55 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index b8ac7b508029..748247c0435b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1245,26 +1245,15 @@ out_unlock: return ret; } -static struct blk_mq_hw_ctx *ublk_get_hw_queue(struct ublk_device *ub, - unsigned int index) -{ - struct blk_mq_hw_ctx *hctx; - unsigned long i; - - queue_for_each_hw_ctx(ub->ub_queue, hctx, i) - if (hctx->queue_num == index) - return hctx; - return NULL; -} - static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) { struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; void __user *argp = (void __user *)(unsigned long)header->addr; - struct blk_mq_hw_ctx *hctx; struct ublk_device *ub; + cpumask_var_t cpumask; unsigned long queue; unsigned int retlen; + unsigned int i; int ret = -EINVAL; if (header->len * BITS_PER_BYTE < nr_cpu_ids) @@ -1276,30 +1265,34 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd) ub = ublk_get_device_from_id(header->dev_id); if (!ub) - goto out; + return -EINVAL; queue = header->data[0]; if (queue >= ub->dev_info.nr_hw_queues) - goto out; - hctx = ublk_get_hw_queue(ub, queue); - if (!hctx) - goto out; + goto out_put_device; + ret = -ENOMEM; + if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL)) + goto out_put_device; + + for_each_possible_cpu(i) { + if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[i] == queue) + cpumask_set_cpu(i, cpumask); + } + + ret = -EFAULT; retlen = min_t(unsigned short, header->len, cpumask_size()); - if (copy_to_user(argp, hctx->cpumask, retlen)) { - ret = -EFAULT; - goto out; - } - if (retlen != header->len) { - if (clear_user(argp + retlen, header->len - retlen)) { - ret = -EFAULT; - goto out; - } - } + if (copy_to_user(argp, cpumask, retlen)) + goto out_free_cpumask; + if (retlen != header->len && + clear_user(argp + retlen, header->len - retlen)) + goto out_free_cpumask; + ret = 0; - out: - if (ub) - ublk_put_device(ub); +out_free_cpumask: + free_cpumask_var(cpumask); +out_put_device: + ublk_put_device(ub); return ret; } From 6d9e6dfdf3b207701471f364121c67eefb000682 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 15:09:16 +0200 Subject: [PATCH 171/178] ublk: defer disk allocation Defer allocating the gendisk and request_queue until UBLK_CMD_START_DEV is called. This avoids funky life times where a disk is allocated and then can be added and removed multiple times, which has never been supported by the block layer. Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220721130916.1869719-9-hch@lst.de Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 131 ++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 76 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 748247c0435b..81bfdda0f1af 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -112,7 +112,6 @@ struct ublk_queue { struct ublk_device { struct gendisk *ub_disk; - struct request_queue *ub_queue; char *__queues; @@ -126,6 +125,7 @@ struct ublk_device { struct device cdev_dev; #define UB_STATE_OPEN (1 << 0) +#define UB_STATE_USED (1 << 1) unsigned long state; int ub_number; @@ -156,8 +156,6 @@ static DEFINE_MUTEX(ublk_ctl_mutex); static struct miscdevice ublk_misc; -static struct lock_class_key ublk_bio_compl_lkclass; - static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) { if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && @@ -209,8 +207,17 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) PAGE_SIZE); } +static void ublk_free_disk(struct gendisk *disk) +{ + struct ublk_device *ub = disk->private_data; + + clear_bit(UB_STATE_USED, &ub->state); + put_device(&ub->cdev_dev); +} + static const struct block_device_operations ub_fops = { .owner = THIS_MODULE, + .free_disk = ublk_free_disk, }; #define UBLK_MAX_PIN_PAGES 32 @@ -801,13 +808,15 @@ static void ublk_cancel_dev(struct ublk_device *ub) static void ublk_stop_dev(struct ublk_device *ub) { mutex_lock(&ub->mutex); - if (!disk_live(ub->ub_disk)) + if (ub->dev_info.state != UBLK_S_DEV_LIVE) goto unlock; del_gendisk(ub->ub_disk); ub->dev_info.state = UBLK_S_DEV_DEAD; ub->dev_info.ublksrv_pid = -1; ublk_cancel_dev(ub); + put_disk(ub->ub_disk); + ub->ub_disk = NULL; unlock: mutex_unlock(&ub->mutex); cancel_delayed_work_sync(&ub->monitor_work); @@ -1033,12 +1042,7 @@ static void ublk_cdev_rel(struct device *dev) { struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev); - blk_mq_destroy_queue(ub->ub_queue); - - put_disk(ub->ub_disk); - blk_mq_free_tag_set(&ub->tag_set); - ublk_deinit_queues(ub); __ublk_destroy_dev(ub); @@ -1078,31 +1082,24 @@ static void ublk_stop_work_fn(struct work_struct *work) ublk_stop_dev(ub); } -static void ublk_update_capacity(struct ublk_device *ub) +/* align maximum I/O size to PAGE_SIZE */ +static void ublk_align_max_io_size(struct ublk_device *ub) { - unsigned int max_rq_bytes; + unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift; - /* make max request buffer size aligned with PAGE_SIZE */ - max_rq_bytes = round_down(ub->dev_info.rq_max_blocks << - ub->bs_shift, PAGE_SIZE); - ub->dev_info.rq_max_blocks = max_rq_bytes >> ub->bs_shift; - - set_capacity(ub->ub_disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9)); + ub->dev_info.rq_max_blocks = + round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift; } -/* add disk & cdev, cleanup everything in case of failure */ +/* add tag_set & cdev, cleanup everything in case of failure */ static int ublk_add_dev(struct ublk_device *ub) { - struct gendisk *disk; int err = -ENOMEM; - int bsize; /* We are not ready to support zero copy */ ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; - bsize = ub->dev_info.block_size; - ub->bs_shift = ilog2(bsize); - + ub->bs_shift = ilog2(ub->dev_info.block_size); ub->dev_info.nr_hw_queues = min_t(unsigned int, ub->dev_info.nr_hw_queues, nr_cpu_ids); @@ -1119,59 +1116,16 @@ static int ublk_add_dev(struct ublk_device *ub) ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ub->tag_set.driver_data = ub; - err = blk_mq_alloc_tag_set(&ub->tag_set); if (err) goto out_deinit_queues; - ub->ub_queue = blk_mq_init_queue(&ub->tag_set); - if (IS_ERR(ub->ub_queue)) { - err = PTR_ERR(ub->ub_queue); - goto out_cleanup_tags; - } - ub->ub_queue->queuedata = ub; - - disk = ub->ub_disk = blk_mq_alloc_disk_for_queue(ub->ub_queue, - &ublk_bio_compl_lkclass); - if (!disk) { - err = -ENOMEM; - goto out_free_request_queue; - } - - blk_queue_logical_block_size(ub->ub_queue, bsize); - blk_queue_physical_block_size(ub->ub_queue, bsize); - blk_queue_io_min(ub->ub_queue, bsize); - - blk_queue_max_hw_sectors(ub->ub_queue, ub->dev_info.rq_max_blocks << - (ub->bs_shift - 9)); - - ub->ub_queue->limits.discard_granularity = PAGE_SIZE; - - blk_queue_max_discard_sectors(ub->ub_queue, UINT_MAX >> 9); - blk_queue_max_write_zeroes_sectors(ub->ub_queue, UINT_MAX >> 9); - - ublk_update_capacity(ub); - - disk->fops = &ub_fops; - disk->private_data = ub; - disk->queue = ub->ub_queue; - sprintf(disk->disk_name, "ublkb%d", ub->ub_number); - + ublk_align_max_io_size(ub); mutex_init(&ub->mutex); /* add char dev so that ublksrv daemon can be setup */ - err = ublk_add_chdev(ub); - if (err) - return err; + return ublk_add_chdev(ub); - /* don't expose disk now until we got start command from cdev */ - - return 0; - -out_free_request_queue: - blk_mq_destroy_queue(ub->ub_queue); -out_cleanup_tags: - blk_mq_free_tag_set(&ub->tag_set); out_deinit_queues: ublk_deinit_queues(ub); out_destroy_dev: @@ -1209,6 +1163,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) int ublksrv_pid = (int)header->data[0]; unsigned long dev_blocks = header->data[1]; struct ublk_device *ub; + struct gendisk *disk; int ret = -EINVAL; if (ublksrv_pid <= 0) @@ -1223,21 +1178,45 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); mutex_lock(&ub->mutex); - if (disk_live(ub->ub_disk)) { + if (ub->dev_info.state == UBLK_S_DEV_LIVE || + test_bit(UB_STATE_USED, &ub->state)) { ret = -EEXIST; goto out_unlock; } /* We may get disk size updated */ - if (dev_blocks) { + if (dev_blocks) ub->dev_info.dev_blocks = dev_blocks; - ublk_update_capacity(ub); - } - ub->dev_info.ublksrv_pid = ublksrv_pid; - ret = add_disk(ub->ub_disk); - if (ret) - goto out_unlock; + disk = blk_mq_alloc_disk(&ub->tag_set, ub); + if (IS_ERR(disk)) { + ret = PTR_ERR(disk); + goto out_unlock; + } + sprintf(disk->disk_name, "ublkb%d", ub->ub_number); + disk->fops = &ub_fops; + disk->private_data = ub; + + blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size); + blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size); + blk_queue_io_min(disk->queue, ub->dev_info.block_size); + blk_queue_max_hw_sectors(disk->queue, + ub->dev_info.rq_max_blocks << (ub->bs_shift - 9)); + disk->queue->limits.discard_granularity = PAGE_SIZE; + blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9); + blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9); + + set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9)); + + ub->dev_info.ublksrv_pid = ublksrv_pid; + ub->ub_disk = disk; + get_device(&ub->cdev_dev); + ret = add_disk(disk); + if (ret) { + put_disk(disk); + goto out_unlock; + } + set_bit(UB_STATE_USED, &ub->state); ub->dev_info.state = UBLK_S_DEV_LIVE; out_unlock: mutex_unlock(&ub->mutex); From 0a3e5cc7bbfcd571a2e53779ef7d7aa3c57d5432 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2022 15:05:40 +0200 Subject: [PATCH 172/178] blk-mq: fix error handling in __blk_mq_alloc_disk To fully clean up the queue if the disk allocation fails we need to call blk_mq_destroy_queue and not just blk_put_queue. Fixes: 6f8191fdf41d ("block: simplify disk shutdown") Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220720130541.1323531-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index d716b7f3763f..70177ee74295 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3960,7 +3960,7 @@ struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, disk = __alloc_disk_node(q, set->numa_node, lkclass); if (!disk) { - blk_put_queue(q); + blk_mq_destroy_queue(q); return ERR_PTR(-ENOMEM); } set_bit(GD_OWNS_QUEUE, &disk->state); From c5db2cfc6274692d821d33b59acb6ff615e350c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2022 15:05:41 +0200 Subject: [PATCH 173/178] block: call blk_mq_exit_queue from disk_release for never added disks To undo the all initialization from blk_mq_init_allocated_queue in case of a probe failure where add_disk is never called we have to call blk_mq_exit_queue from put_disk. This relies on the fact that drivers always call blk_mq_free_tag_set after calling put_disk in the probe error path if they have a gendisk at all. We should be doing this in general, but can't do it for the normal teardown case (yet) as the tagset can be gone by the time the disk is released once it was added. I hope to sort this out properly eventually but for now this isolated hack will do it. Fixes: 6f8191fdf41d ("block: simplify disk shutdown") Signed-off-by: Christoph Hellwig Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20220720130541.1323531-2-hch@lst.de Signed-off-by: Jens Axboe --- block/genhd.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/block/genhd.c b/block/genhd.c index 44dfcf67ed96..e1d5b10ac193 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1138,6 +1138,18 @@ static void disk_release(struct device *dev) might_sleep(); WARN_ON_ONCE(disk_live(disk)); + /* + * To undo the all initialization from blk_mq_init_allocated_queue in + * case of a probe failure where add_disk is never called we have to + * call blk_mq_exit_queue here. We can't do this for the more common + * teardown case (yet) as the tagset can be gone by the time the disk + * is released once it was added. + */ + if (queue_is_mq(disk->queue) && + test_bit(GD_OWNS_QUEUE, &disk->state) && + !test_bit(GD_ADDED, &disk->state)) + blk_mq_exit_queue(disk->queue); + blkcg_exit_queue(disk->queue); disk_release_events(disk); @@ -1403,6 +1415,9 @@ EXPORT_SYMBOL(__blk_alloc_disk); * This decrements the refcount for the struct gendisk. When this reaches 0 * we'll have disk_release() called. * + * Note: for blk-mq disk put_disk must be called before freeing the tag_set + * when handling probe errors (that is before add_disk() is called). + * * Context: Any context, but the last reference must not be dropped from * atomic context. */ From 828b5f017d9d5d0491cd2e71d48f4f2139078e2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jul 2022 08:34:32 +0200 Subject: [PATCH 174/178] block: remove __blk_get_queue __blk_get_queue is only called by blk_get_queue, so merge the two. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220721063432.1714609-1-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-core.c | 10 ++++------ block/blk.h | 5 ----- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 123468b9d2e4..3d286a256d3d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -461,12 +461,10 @@ fail_q: */ bool blk_get_queue(struct request_queue *q) { - if (likely(!blk_queue_dying(q))) { - __blk_get_queue(q); - return true; - } - - return false; + if (unlikely(blk_queue_dying(q))) + return false; + kobject_get(&q->kobj); + return true; } EXPORT_SYMBOL(blk_get_queue); diff --git a/block/blk.h b/block/blk.h index c4b084bfe87c..1d83b1d41cd1 100644 --- a/block/blk.h +++ b/block/blk.h @@ -31,11 +31,6 @@ extern struct kmem_cache *blk_requestq_srcu_cachep; extern struct kobj_type blk_queue_ktype; extern struct ida blk_queue_ida; -static inline void __blk_get_queue(struct request_queue *q) -{ - kobject_get(&q->kobj); -} - bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, From e94eb459d3e4604927ab4e08f81649fcea418318 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 21 Jul 2022 23:31:17 +0800 Subject: [PATCH 175/178] ublk_drv: fix lockdep warning ub->mutex is used to protecting reading and writing ub->mm, then the following lockdep warning is triggered. Fix it by using one dedicated spin lock for protecting ub->mm. [1] lockdep warning [ 25.046186] ====================================================== [ 25.048886] WARNING: possible circular locking dependency detected [ 25.051610] 5.19.0-rc4_for-v5.20+ #149 Not tainted [ 25.053665] ------------------------------------------------------ [ 25.056334] ublk/989 is trying to acquire lock: [ 25.058296] ffff975d0329a918 (&disk->open_mutex){+.+.}-{3:3}, at: bd_register_pending_holders+0x2a/0x110 [ 25.063678] [ 25.063678] but task is already holding lock: [ 25.066246] ffff975d1df59708 (&ub->mutex){+.+.}-{3:3}, at: ublk_ctrl_uring_cmd+0x2df/0x730 [ 25.069423] [ 25.069423] which lock already depends on the new lock. [ 25.069423] [ 25.072603] [ 25.072603] the existing dependency chain (in reverse order) is: [ 25.074908] [ 25.074908] -> #3 (&ub->mutex){+.+.}-{3:3}: [ 25.076386] __mutex_lock+0x93/0x870 [ 25.077470] ublk_ch_mmap+0x3a/0x140 [ 25.078494] mmap_region+0x375/0x5a0 [ 25.079386] do_mmap+0x33a/0x530 [ 25.080168] vm_mmap_pgoff+0xb9/0x150 [ 25.080979] ksys_mmap_pgoff+0x184/0x1f0 [ 25.081838] do_syscall_64+0x37/0x80 [ 25.082653] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 25.083730] [ 25.083730] -> #2 (&mm->mmap_lock#2){++++}-{3:3}: [ 25.084707] __might_fault+0x55/0x80 [ 25.085344] _copy_from_user+0x1e/0xa0 [ 25.086020] get_sg_io_hdr+0x26/0xb0 [ 25.086651] scsi_ioctl+0x42f/0x960 [ 25.087267] sr_block_ioctl+0xe8/0x100 [ 25.087734] blkdev_ioctl+0x134/0x2b0 [ 25.088196] __x64_sys_ioctl+0x8a/0xc0 [ 25.088677] do_syscall_64+0x37/0x80 [ 25.089044] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 25.089548] [ 25.089548] -> #1 (&cd->lock){+.+.}-{3:3}: [ 25.090072] __mutex_lock+0x93/0x870 [ 25.090452] sr_block_open+0x64/0xe0 [ 25.090837] blkdev_get_whole+0x26/0x90 [ 25.091445] blkdev_get_by_dev.part.0+0x1ce/0x2f0 [ 25.092203] blkdev_open+0x52/0x90 [ 25.092617] do_dentry_open+0x1ca/0x360 [ 25.093499] path_openat+0x78d/0xcb0 [ 25.094136] do_filp_open+0xa1/0x130 [ 25.094759] do_sys_openat2+0x76/0x130 [ 25.095454] __x64_sys_openat+0x5c/0x70 [ 25.096078] do_syscall_64+0x37/0x80 [ 25.096637] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 25.097304] [ 25.097304] -> #0 (&disk->open_mutex){+.+.}-{3:3}: [ 25.098229] __lock_acquire+0x12e2/0x1f90 [ 25.098789] lock_acquire+0xbf/0x2c0 [ 25.099256] __mutex_lock+0x93/0x870 [ 25.099706] bd_register_pending_holders+0x2a/0x110 [ 25.100246] device_add_disk+0x209/0x370 [ 25.100712] ublk_ctrl_uring_cmd+0x405/0x730 [ 25.101205] io_issue_sqe+0xfe/0x2ac0 [ 25.101665] io_submit_sqes+0x352/0x1820 [ 25.102131] __do_sys_io_uring_enter+0x848/0xdc0 [ 25.102646] do_syscall_64+0x37/0x80 [ 25.103087] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 25.103640] [ 25.103640] other info that might help us debug this: [ 25.103640] [ 25.104549] Chain exists of: [ 25.104549] &disk->open_mutex --> &mm->mmap_lock#2 --> &ub->mutex [ 25.104549] [ 25.105611] Possible unsafe locking scenario: [ 25.105611] [ 25.106258] CPU0 CPU1 [ 25.106677] ---- ---- [ 25.107100] lock(&ub->mutex); [ 25.107446] lock(&mm->mmap_lock#2); [ 25.108045] lock(&ub->mutex); [ 25.108802] lock(&disk->open_mutex); [ 25.109265] [ 25.109265] *** DEADLOCK *** [ 25.109265] [ 25.110117] 2 locks held by ublk/989: [ 25.110490] #0: ffff975d07bbf8a8 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0x83e/0xdc0 [ 25.111249] #1: ffff975d1df59708 (&ub->mutex){+.+.}-{3:3}, at: ublk_ctrl_uring_cmd+0x2df/0x730 [ 25.111943] [ 25.111943] stack backtrace: [ 25.112557] CPU: 2 PID: 989 Comm: ublk Not tainted 5.19.0-rc4_for-v5.20+ #149 [ 25.113137] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-1.fc33 04/01/2014 [ 25.113792] Call Trace: [ 25.114130] [ 25.114417] dump_stack_lvl+0x71/0xa0 [ 25.114771] check_noncircular+0xdf/0x100 [ 25.115137] ? register_lock_class+0x38/0x470 [ 25.115524] __lock_acquire+0x12e2/0x1f90 [ 25.115887] ? find_held_lock+0x2b/0x80 [ 25.116244] lock_acquire+0xbf/0x2c0 [ 25.116590] ? bd_register_pending_holders+0x2a/0x110 [ 25.117009] __mutex_lock+0x93/0x870 [ 25.117362] ? bd_register_pending_holders+0x2a/0x110 [ 25.117780] ? bd_register_pending_holders+0x2a/0x110 [ 25.118201] ? kobject_add+0x71/0x90 [ 25.118546] ? bd_register_pending_holders+0x2a/0x110 [ 25.118958] bd_register_pending_holders+0x2a/0x110 [ 25.119373] device_add_disk+0x209/0x370 [ 25.119732] ublk_ctrl_uring_cmd+0x405/0x730 [ 25.120109] ? rcu_read_lock_sched_held+0x3c/0x70 [ 25.120514] io_issue_sqe+0xfe/0x2ac0 [ 25.120863] io_submit_sqes+0x352/0x1820 [ 25.121228] ? rcu_read_lock_sched_held+0x3c/0x70 [ 25.121626] ? __do_sys_io_uring_enter+0x83e/0xdc0 [ 25.122028] ? find_held_lock+0x2b/0x80 [ 25.122390] ? __do_sys_io_uring_enter+0x848/0xdc0 [ 25.122791] __do_sys_io_uring_enter+0x848/0xdc0 [ 25.123190] ? syscall_enter_from_user_mode+0x20/0x70 [ 25.123606] ? syscall_enter_from_user_mode+0x20/0x70 [ 25.124024] do_syscall_64+0x37/0x80 [ 25.124383] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 25.124829] RIP: 0033:0x7f120a762af6 [ 25.125223] Code: 45 c1 41 89 c2 41 b9 08 00 00 00 41 83 ca 10 f6 87 d0 00 00 00 01 8b bf cc 00 00 00 44 0f 44 d0 45 31 c0c [ 25.126576] RSP: 002b:00007ffdcb3c5518 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa [ 25.127153] RAX: ffffffffffffffda RBX: 00000000013aef50 RCX: 00007f120a762af6 [ 25.127748] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000004 [ 25.128351] RBP: 000000000000000b R08: 0000000000000000 R09: 0000000000000008 [ 25.128956] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffdcb3c74a6 [ 25.129524] R13: 00000000013aef50 R14: 0000000000000000 R15: 00000000000003df [ 25.130121] Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220721153117.591394-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 81bfdda0f1af..f058f40b639c 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -131,6 +131,7 @@ struct ublk_device { struct mutex mutex; + spinlock_t mm_lock; struct mm_struct *mm; struct completion completion; @@ -678,12 +679,12 @@ static int ublk_ch_mmap(struct file *filp, struct vm_area_struct *vma) unsigned long pfn, end, phys_off = vma->vm_pgoff << PAGE_SHIFT; int q_id, ret = 0; - mutex_lock(&ub->mutex); + spin_lock(&ub->mm_lock); if (!ub->mm) ub->mm = current->mm; if (current->mm != ub->mm) ret = -EINVAL; - mutex_unlock(&ub->mutex); + spin_unlock(&ub->mm_lock); if (ret) return ret; @@ -1122,6 +1123,7 @@ static int ublk_add_dev(struct ublk_device *ub) ublk_align_max_io_size(ub); mutex_init(&ub->mutex); + spin_lock_init(&ub->mm_lock); /* add char dev so that ublksrv daemon can be setup */ return ublk_add_chdev(ub); From fa9482e0b23d9abe7034becff59daeaba09146ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 22 Jul 2022 18:38:16 +0800 Subject: [PATCH 176/178] ublk_drv: fix error handling of ublk_add_dev __ublk_destroy_dev() is called for handling error in ublk_add_dev(), but either tagset isn't allocated or mutex isn't initialized. So fix the issue by letting replacing ublk_add_dev with a ublk_add_tag_set function that is much more limited in scope and instead unwind every single step directly in ublk_ctrl_add_dev. To allow for this refactor the device freeing so that there is a helper for freeing the device number instead of coupling that with freeing the mutex and the memory. Note that this now copies the dev_info to userspace before adding the character device. This not only simplifies the erro handling in ublk_ctrl_add_dev, but also means that the character device can only be seen by userspace if the device addition succeeded. Based on a patch from Ming Lei. Signed-off-by: Christoph Hellwig Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220722103817.631258-2-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 102 +++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f058f40b639c..67f91a80a7ab 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1005,7 +1005,7 @@ static int ublk_init_queues(struct ublk_device *ub) return ret; } -static int __ublk_alloc_dev_number(struct ublk_device *ub, int idx) +static int ublk_alloc_dev_number(struct ublk_device *ub, int idx) { int i = idx; int err; @@ -1027,16 +1027,12 @@ static int __ublk_alloc_dev_number(struct ublk_device *ub, int idx) return err; } -static void __ublk_destroy_dev(struct ublk_device *ub) +static void ublk_free_dev_number(struct ublk_device *ub) { spin_lock(&ublk_idr_lock); idr_remove(&ublk_index_idr, ub->ub_number); wake_up_all(&ublk_idr_wq); spin_unlock(&ublk_idr_lock); - - mutex_destroy(&ub->mutex); - - kfree(ub); } static void ublk_cdev_rel(struct device *dev) @@ -1045,8 +1041,9 @@ static void ublk_cdev_rel(struct device *dev) blk_mq_free_tag_set(&ub->tag_set); ublk_deinit_queues(ub); - - __ublk_destroy_dev(ub); + ublk_free_dev_number(ub); + mutex_destroy(&ub->mutex); + kfree(ub); } static int ublk_add_chdev(struct ublk_device *ub) @@ -1092,24 +1089,8 @@ static void ublk_align_max_io_size(struct ublk_device *ub) round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift; } -/* add tag_set & cdev, cleanup everything in case of failure */ -static int ublk_add_dev(struct ublk_device *ub) +static int ublk_add_tag_set(struct ublk_device *ub) { - int err = -ENOMEM; - - /* We are not ready to support zero copy */ - ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; - - ub->bs_shift = ilog2(ub->dev_info.block_size); - ub->dev_info.nr_hw_queues = min_t(unsigned int, - ub->dev_info.nr_hw_queues, nr_cpu_ids); - - INIT_WORK(&ub->stop_work, ublk_stop_work_fn); - INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work); - - if (ublk_init_queues(ub)) - goto out_destroy_dev; - ub->tag_set.ops = &ublk_mq_ops; ub->tag_set.nr_hw_queues = ub->dev_info.nr_hw_queues; ub->tag_set.queue_depth = ub->dev_info.queue_depth; @@ -1117,22 +1098,7 @@ static int ublk_add_dev(struct ublk_device *ub) ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ub->tag_set.driver_data = ub; - err = blk_mq_alloc_tag_set(&ub->tag_set); - if (err) - goto out_deinit_queues; - - ublk_align_max_io_size(ub); - mutex_init(&ub->mutex); - spin_lock_init(&ub->mm_lock); - - /* add char dev so that ublksrv daemon can be setup */ - return ublk_add_chdev(ub); - -out_deinit_queues: - ublk_deinit_queues(ub); -out_destroy_dev: - __ublk_destroy_dev(ub); - return err; + return blk_mq_alloc_tag_set(&ub->tag_set); } static void ublk_remove(struct ublk_device *ub) @@ -1318,26 +1284,56 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) ub = kzalloc(sizeof(*ub), GFP_KERNEL); if (!ub) goto out_unlock; + mutex_init(&ub->mutex); + spin_lock_init(&ub->mm_lock); + INIT_WORK(&ub->stop_work, ublk_stop_work_fn); + INIT_DELAYED_WORK(&ub->monitor_work, ublk_daemon_monitor_work); - ret = __ublk_alloc_dev_number(ub, header->dev_id); - if (ret < 0) { - kfree(ub); - goto out_unlock; - } + ret = ublk_alloc_dev_number(ub, header->dev_id); + if (ret < 0) + goto out_free_ub; memcpy(&ub->dev_info, &info, sizeof(info)); /* update device id */ ub->dev_info.dev_id = ub->ub_number; - ret = ublk_add_dev(ub); - if (ret) - goto out_unlock; + /* We are not ready to support zero copy */ + ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; - if (copy_to_user(argp, &ub->dev_info, sizeof(info))) { - ublk_remove(ub); - ret = -EFAULT; - } + ub->bs_shift = ilog2(ub->dev_info.block_size); + ub->dev_info.nr_hw_queues = min_t(unsigned int, + ub->dev_info.nr_hw_queues, nr_cpu_ids); + ublk_align_max_io_size(ub); + + ret = ublk_init_queues(ub); + if (ret) + goto out_free_dev_number; + + ret = ublk_add_tag_set(ub); + if (ret) + goto out_deinit_queues; + + ret = -EFAULT; + if (copy_to_user(argp, &ub->dev_info, sizeof(info))) + goto out_free_tag_set; + + /* + * Add the char dev so that ublksrv daemon can be setup. + * ublk_add_chdev() will cleanup everything if it fails. + */ + ret = ublk_add_chdev(ub); + goto out_unlock; + +out_free_tag_set: + blk_mq_free_tag_set(&ub->tag_set); +out_deinit_queues: + ublk_deinit_queues(ub); +out_free_dev_number: + ublk_free_dev_number(ub); +out_free_ub: + mutex_destroy(&ub->mutex); + kfree(ub); out_unlock: mutex_unlock(&ublk_ctl_mutex); return ret; From 6d8c5afc9ab14595707ff25d971dde45728eba3e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Jul 2022 18:38:17 +0800 Subject: [PATCH 177/178] ublk_drv: make sure that correct flags(features) returned to userspace Userspace may support more features or new added flags, but the driver side can be old, so make sure correct flags(features) returned to userpsace, then userspace can work as expected. Also mark the 2nd flags as reversed, just use the 1st one. When we run out of flags, the reserved one can be handled at that time. Reviewed-by: Christoph Hellwig Reviewed-by: ZiyangZhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20220722103817.631258-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 17 ++++++++++++++--- include/uapi/linux/ublk_cmd.h | 7 ++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 67f91a80a7ab..255b2de46a24 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -46,6 +46,9 @@ #define UBLK_MINORS (1U << MINORBITS) +/* All UBLK_F_* have to be included into UBLK_F_ALL */ +#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK) + struct ublk_rq_data { struct callback_head work; }; @@ -953,7 +956,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id) void *ptr; int size; - ubq->flags = ub->dev_info.flags[0]; + ubq->flags = ub->dev_info.flags; ubq->q_id = q_id; ubq->q_depth = ub->dev_info.queue_depth; size = ublk_queue_cmd_buf_size(ub, q_id); @@ -1246,7 +1249,7 @@ out_put_device: static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info) { pr_devel("%s: dev id %d flags %llx\n", __func__, - info->dev_id, info->flags[0]); + info->dev_id, info->flags); pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", info->nr_hw_queues, info->queue_depth, info->block_size, info->dev_blocks); @@ -1298,8 +1301,16 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) /* update device id */ ub->dev_info.dev_id = ub->ub_number; + /* + * 64bit flags will be copied back to userspace as feature + * negotiation result, so have to clear flags which driver + * doesn't support yet, then userspace can get correct flags + * (features) to handle. + */ + ub->dev_info.flags &= UBLK_F_ALL; + /* We are not ready to support zero copy */ - ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; + ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; ub->bs_shift = ilog2(ub->dev_info.block_size); ub->dev_info.nr_hw_queues = min_t(unsigned int, diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 917580b34198..ca33092354ab 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -46,13 +46,13 @@ * zero copy requires 4k block size, and can remap ublk driver's io * request into ublksrv's vm space */ -#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0) +#define UBLK_F_SUPPORT_ZERO_COPY (1ULL << 0) /* * Force to complete io cmd via io_uring_cmd_complete_in_task so that * performance comparison is done easily with using task_work_add */ -#define UBLK_F_URING_CMD_COMP_IN_TASK (1UL << 1) +#define UBLK_F_URING_CMD_COMP_IN_TASK (1ULL << 1) /* device state */ #define UBLK_S_DEV_DEAD 0 @@ -88,7 +88,8 @@ struct ublksrv_ctrl_dev_info { __s32 ublksrv_pid; __s32 reserved0; - __u64 flags[2]; + __u64 flags; + __u64 flags_reserved; /* For ublksrv internal use, invisible to ublk driver */ __u64 ublksrv_flags; From 8d9fdb6011b4d413271eba3a62e10f89efecc419 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Jul 2022 17:12:23 +0300 Subject: [PATCH 178/178] ublk_drv: fix double shift bug The test/clear_bit() functions take a bit number, but this code is passing as shifted value. It's the equivalent of saying BIT(BIT(0)) instead of just BIT(0). This doesn't affect runtime because numbers are small and it's done consistently. Fixes: fa362045564e ("ublk: simplify ublk_ch_open and ublk_ch_release") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/Yt/2R/+MJf/MSoyl@kili Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 255b2de46a24..3f1906965ac8 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -127,8 +127,8 @@ struct ublk_device { struct cdev cdev; struct device cdev_dev; -#define UB_STATE_OPEN (1 << 0) -#define UB_STATE_USED (1 << 1) +#define UB_STATE_OPEN 0 +#define UB_STATE_USED 1 unsigned long state; int ub_number;