mirror of
https://github.com/torvalds/linux.git
synced 2024-11-21 19:41:42 +00:00
for-6.10/block-20240511
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmY/YgsQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpvi0EACwnFRtYioizBH0x7QUHTBcIr0IhACd5gfz bm+uwlDUtf6G6lupHdJT9gOVB2z2z1m2Pz//8RuUVWw3Eqw2+rfgG8iJd+yo7IaV DpX3WaM4NnBvB7FKOKHlMPvGuf7KgbZ3uPm3x8cbrn/axMmkZ6ljxTixJ3p5t4+s xRsef/lVdG71DkXIFgTKATB86yNRJNlRQTbL+sZW22vdXdtfyBbOgR1sBuFfp7Hd g/uocZM/z0ahM6JH/5R2IX2ttKXMIBZLA8HRkJdvYqg022cj4js2YyRCPU3N6jQN MtN4TpJV5I++8l6SPQOOhaDNrK/6zFtDQpwG0YBiKKj3nQDgVbWWb8ejYTIUv4MP SrEto4MVBEqg5N65VwYYhIf45rmueFyJp6z0Vqv6Owur5nuww/YIFknmoMa/WDMd V8dIU3zL72FZDbPjIBjxHeqAGz9OgzEVafled7pi0Xbw6wqiB4kZihlMGXlD+WBy Yd6xo8PX4i5+d2LLKKPxpW1X0eJlKYJ/4dnYCoFN8LmXSiPJnMx2pYrV+NqMxy4X Thr8lxswLQC7j9YBBuIeDl8NB9N5FZZLvaC6I25QKq045M2ckJ+VrounsQb3vGwJ 72nlxxBZL8wz3sasgX9Pc1Cez9AqYbM+UZahq8ezPY5y3Jh0QfRw/MOk1ZaDNC8V CNOHBH0E+Q== =HnjE -----END PGP SIGNATURE----- Merge tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - Add a partscan attribute in sysfs, fixing an issue with systemd relying on an internal interface that went away. - Attempt #2 at making long running discards interruptible. The previous attempt went into 6.9, but we ended up mostly reverting it as it had issues. - Remove old ida_simple API in bcache - Support for zoned write plugging, greatly improving the performance on zoned devices. - Remove the old throttle low interface, which has been experimental since 2017 and never made it beyond that and isn't being used. - Remove page->index debugging checks in brd, as it hasn't caught anything and prepares us for removing in struct page. - MD pull request from Song - Don't schedule block workers on isolated CPUs * tag 'for-6.10/block-20240511' of git://git.kernel.dk/linux: (84 commits) blk-throttle: delay initialization until configuration blk-throttle: remove CONFIG_BLK_DEV_THROTTLING_LOW block: fix that util can be greater than 100% block: support to account io_ticks precisely block: add plug while submitting IO bcache: fix variable length array abuse in btree_iter bcache: Remove usage of the deprecated ida_simple_xx() API md: Revert "md: Fix overflow in is_mddev_idle" blk-lib: check for kill signal in ioctl BLKDISCARD block: add a bio_await_chain helper block: add a blk_alloc_discard_bio helper block: add a bio_chain_and_submit helper block: move discard checks into the ioctl handler block: remove the discard_granularity check in __blkdev_issue_discard block/ioctl: prefer different overflow check null_blk: Fix the WARNING: modpost: missing MODULE_DESCRIPTION() block: fix and simplify blkdevparts= cmdline parsing block: refine the EOF check in blkdev_iomap_begin block: add a partscan sysfs attribute for disks block: add a disk_has_partscan helper ...
This commit is contained in:
commit
0c9f4ac808
@ -101,6 +101,16 @@ Description:
|
||||
devices that support receiving integrity metadata.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/partscan
|
||||
Date: May 2024
|
||||
Contact: Christoph Hellwig <hch@lst.de>
|
||||
Description:
|
||||
The /sys/block/<disk>/partscan files reports if partition
|
||||
scanning is enabled for the disk. It returns "1" if partition
|
||||
scanning is enabled, or "0" if not. The value type is a 32-bit
|
||||
unsigned integer, but only "0" and "1" are valid values.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/<partition>/alignment_offset
|
||||
Date: April 2009
|
||||
Contact: Martin K. Petersen <martin.petersen@oracle.com>
|
||||
@ -584,18 +594,6 @@ Description:
|
||||
the data. If no such restriction exists, this file will contain
|
||||
'0'. This file is writable for testing purposes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/throttle_sample_time
|
||||
Date: March 2017
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This is the time window that blk-throttle samples data, in
|
||||
millisecond. blk-throttle makes decision based on the
|
||||
samplings. Lower time means cgroups have more smooth throughput,
|
||||
but higher CPU overhead. This exists only when
|
||||
CONFIG_BLK_DEV_THROTTLING_LOW is enabled.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/virt_boundary_mask
|
||||
Date: April 2021
|
||||
Contact: linux-block@vger.kernel.org
|
||||
|
@ -76,7 +76,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
|
||||
CONFIG_MODVERSIONS=y
|
||||
CONFIG_BLK_DEV_ZONED=y
|
||||
CONFIG_BLK_DEV_THROTTLING=y
|
||||
CONFIG_BLK_DEV_THROTTLING_LOW=y
|
||||
CONFIG_BLK_WBT=y
|
||||
CONFIG_BLK_CGROUP_IOLATENCY=y
|
||||
CONFIG_BLK_CGROUP_FC_APPID=y
|
||||
|
@ -100,7 +100,6 @@ config BLK_DEV_WRITE_MOUNTED
|
||||
|
||||
config BLK_DEV_ZONED
|
||||
bool "Zoned block device support"
|
||||
select MQ_IOSCHED_DEADLINE
|
||||
help
|
||||
Block layer zoned block device support. This option enables
|
||||
support for ZAC/ZBC/ZNS host-managed and host-aware zoned block
|
||||
@ -120,17 +119,6 @@ config BLK_DEV_THROTTLING
|
||||
|
||||
See Documentation/admin-guide/cgroup-v1/blkio-controller.rst for more information.
|
||||
|
||||
config BLK_DEV_THROTTLING_LOW
|
||||
bool "Block throttling .low limit interface support (EXPERIMENTAL)"
|
||||
depends on BLK_DEV_THROTTLING
|
||||
help
|
||||
Add .low limit interface for block throttling. The low limit is a best
|
||||
effort limit to prioritize cgroups. Depending on the setting, the limit
|
||||
can be used to protect cgroups in terms of bandwidth/iops and better
|
||||
utilize disk resource.
|
||||
|
||||
Note, this is an experimental interface and could be changed someday.
|
||||
|
||||
config BLK_WBT
|
||||
bool "Enable support for block device writeback throttling"
|
||||
help
|
||||
@ -198,10 +186,6 @@ config BLK_DEBUG_FS
|
||||
Unless you are building a kernel for a tiny system, you should
|
||||
say Y here.
|
||||
|
||||
config BLK_DEBUG_FS_ZONED
|
||||
bool
|
||||
default BLK_DEBUG_FS && BLK_DEV_ZONED
|
||||
|
||||
config BLK_SED_OPAL
|
||||
bool "Logic for interfacing with Opal enabled SEDs"
|
||||
depends on KEYS
|
||||
|
@ -33,7 +33,6 @@ obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
|
||||
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
|
||||
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
|
||||
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
|
||||
obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
|
||||
obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
|
||||
obj-$(CONFIG_BLK_PM) += blk-pm.o
|
||||
obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \
|
||||
|
50
block/bio.c
50
block/bio.c
@ -345,17 +345,28 @@ void bio_chain(struct bio *bio, struct bio *parent)
|
||||
}
|
||||
EXPORT_SYMBOL(bio_chain);
|
||||
|
||||
/**
|
||||
* bio_chain_and_submit - submit a bio after chaining it to another one
|
||||
* @prev: bio to chain and submit
|
||||
* @new: bio to chain to
|
||||
*
|
||||
* If @prev is non-NULL, chain it to @new and submit it.
|
||||
*
|
||||
* Return: @new.
|
||||
*/
|
||||
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new)
|
||||
{
|
||||
if (prev) {
|
||||
bio_chain(prev, new);
|
||||
submit_bio(prev);
|
||||
}
|
||||
return new;
|
||||
}
|
||||
|
||||
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
|
||||
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp)
|
||||
{
|
||||
struct bio *new = bio_alloc(bdev, nr_pages, opf, gfp);
|
||||
|
||||
if (bio) {
|
||||
bio_chain(bio, new);
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
return new;
|
||||
return bio_chain_and_submit(bio, bio_alloc(bdev, nr_pages, opf, gfp));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_next_bio);
|
||||
|
||||
@ -1384,6 +1395,26 @@ int submit_bio_wait(struct bio *bio)
|
||||
}
|
||||
EXPORT_SYMBOL(submit_bio_wait);
|
||||
|
||||
static void bio_wait_end_io(struct bio *bio)
|
||||
{
|
||||
complete(bio->bi_private);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* bio_await_chain - ends @bio and waits for every chained bio to complete
|
||||
*/
|
||||
void bio_await_chain(struct bio *bio)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK_MAP(done,
|
||||
bio->bi_bdev->bd_disk->lockdep_map);
|
||||
|
||||
bio->bi_private = &done;
|
||||
bio->bi_end_io = bio_wait_end_io;
|
||||
bio_endio(bio);
|
||||
blk_wait_io(&done);
|
||||
}
|
||||
|
||||
void __bio_advance(struct bio *bio, unsigned bytes)
|
||||
{
|
||||
if (bio_integrity(bio))
|
||||
@ -1576,6 +1607,8 @@ again:
|
||||
if (!bio_integrity_endio(bio))
|
||||
return;
|
||||
|
||||
blk_zone_bio_endio(bio);
|
||||
|
||||
rq_qos_done_bio(bio);
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
@ -1596,7 +1629,6 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
blk_throtl_bio_endio(bio);
|
||||
/* release cgroup info */
|
||||
bio_uninit(bio);
|
||||
if (bio->bi_end_io)
|
||||
|
@ -9,25 +9,19 @@ int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++) {
|
||||
ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
|
||||
if (ret) {
|
||||
while (--i >= 0)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
return ret;
|
||||
}
|
||||
ret = percpu_counter_init_many(rwstat->cpu_cnt, 0, gfp, BLKG_RWSTAT_NR);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
atomic64_set(&rwstat->aux_cnt[i], 0);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_init);
|
||||
|
||||
void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BLKG_RWSTAT_NR; i++)
|
||||
percpu_counter_destroy(&rwstat->cpu_cnt[i]);
|
||||
percpu_counter_destroy_many(rwstat->cpu_cnt, BLKG_RWSTAT_NR);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkg_rwstat_exit);
|
||||
|
||||
|
@ -218,8 +218,7 @@ static void blkg_async_bio_workfn(struct work_struct *work)
|
||||
|
||||
/* as long as there are pending bios, @blkg can't go away */
|
||||
spin_lock(&blkg->async_bio_lock);
|
||||
bio_list_merge(&bios, &blkg->async_bios);
|
||||
bio_list_init(&blkg->async_bios);
|
||||
bio_list_merge_init(&bios, &blkg->async_bios);
|
||||
spin_unlock(&blkg->async_bio_lock);
|
||||
|
||||
/* start plug only when bio_list contains at least 2 bios */
|
||||
@ -1444,14 +1443,8 @@ int blkcg_init_disk(struct gendisk *disk)
|
||||
if (ret)
|
||||
goto err_destroy_all;
|
||||
|
||||
ret = blk_throtl_init(disk);
|
||||
if (ret)
|
||||
goto err_ioprio_exit;
|
||||
|
||||
return 0;
|
||||
|
||||
err_ioprio_exit:
|
||||
blk_ioprio_exit(disk);
|
||||
err_destroy_all:
|
||||
blkg_destroy_all(disk);
|
||||
return ret;
|
||||
|
@ -591,8 +591,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
return BLK_STS_NOTSUPP;
|
||||
|
||||
/* The bio sector must point to the start of a sequential zone */
|
||||
if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector) ||
|
||||
!bio_zone_is_seq(bio))
|
||||
if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/*
|
||||
@ -604,7 +603,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/* Make sure the BIO is small enough and will not get split */
|
||||
if (nr_sectors > q->limits.max_zone_append_sectors)
|
||||
if (nr_sectors > queue_max_zone_append_sectors(q))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
bio->bi_opf |= REQ_NOMERGE;
|
||||
@ -649,11 +648,13 @@ static void __submit_bio(struct bio *bio)
|
||||
static void __submit_bio_noacct(struct bio *bio)
|
||||
{
|
||||
struct bio_list bio_list_on_stack[2];
|
||||
struct blk_plug plug;
|
||||
|
||||
BUG_ON(bio->bi_next);
|
||||
|
||||
bio_list_init(&bio_list_on_stack[0]);
|
||||
current->bio_list = bio_list_on_stack;
|
||||
blk_start_plug(&plug);
|
||||
|
||||
do {
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
@ -687,19 +688,23 @@ static void __submit_bio_noacct(struct bio *bio)
|
||||
bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
|
||||
} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
current->bio_list = NULL;
|
||||
}
|
||||
|
||||
static void __submit_bio_noacct_mq(struct bio *bio)
|
||||
{
|
||||
struct bio_list bio_list[2] = { };
|
||||
struct blk_plug plug;
|
||||
|
||||
current->bio_list = bio_list;
|
||||
blk_start_plug(&plug);
|
||||
|
||||
do {
|
||||
__submit_bio(bio);
|
||||
} while ((bio = bio_list_pop(&bio_list[0])));
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
current->bio_list = NULL;
|
||||
}
|
||||
|
||||
@ -910,12 +915,6 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
|
||||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* As the requests that require a zone lock are not plugged in the
|
||||
* first place, directly accessing the plug instead of using
|
||||
* blk_mq_plug() should not have any consequences during flushing for
|
||||
* zoned devices.
|
||||
*/
|
||||
blk_flush_plug(current->plug, false);
|
||||
|
||||
/*
|
||||
@ -987,10 +986,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
|
||||
unsigned long stamp;
|
||||
again:
|
||||
stamp = READ_ONCE(part->bd_stamp);
|
||||
if (unlikely(time_after(now, stamp))) {
|
||||
if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
|
||||
__part_stat_add(part, io_ticks, end ? now - stamp : 1);
|
||||
}
|
||||
if (unlikely(time_after(now, stamp)) &&
|
||||
likely(try_cmpxchg(&part->bd_stamp, &stamp, now)) &&
|
||||
(end || part_in_flight(part)))
|
||||
__part_stat_add(part, io_ticks, now - stamp);
|
||||
|
||||
if (part->bd_partno) {
|
||||
part = bdev_whole(part);
|
||||
goto again;
|
||||
|
@ -130,6 +130,8 @@ static void blk_flush_restore_request(struct request *rq)
|
||||
* original @rq->bio. Restore it.
|
||||
*/
|
||||
rq->bio = rq->biotail;
|
||||
if (rq->bio)
|
||||
rq->__sector = rq->bio->bi_iter.bi_sector;
|
||||
|
||||
/* make @rq a normal request */
|
||||
rq->rq_flags &= ~RQF_FLUSH_SEQ;
|
||||
|
@ -35,51 +35,39 @@ static sector_t bio_discard_limit(struct block_device *bdev, sector_t sector)
|
||||
return round_down(UINT_MAX, discard_granularity) >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
struct bio *blk_alloc_discard_bio(struct block_device *bdev,
|
||||
sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask)
|
||||
{
|
||||
sector_t bio_sects = min(*nr_sects, bio_discard_limit(bdev, *sector));
|
||||
struct bio *bio;
|
||||
|
||||
if (!bio_sects)
|
||||
return NULL;
|
||||
|
||||
bio = bio_alloc(bdev, 0, REQ_OP_DISCARD, gfp_mask);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
bio->bi_iter.bi_sector = *sector;
|
||||
bio->bi_iter.bi_size = bio_sects << SECTOR_SHIFT;
|
||||
*sector += bio_sects;
|
||||
*nr_sects -= bio_sects;
|
||||
/*
|
||||
* We can loop for a long time in here if someone does full device
|
||||
* discards (like mkfs). Be nice and allow us to schedule out to avoid
|
||||
* softlocking if preempt is disabled.
|
||||
*/
|
||||
cond_resched();
|
||||
return bio;
|
||||
}
|
||||
|
||||
int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop)
|
||||
{
|
||||
struct bio *bio = *biop;
|
||||
sector_t bs_mask;
|
||||
struct bio *bio;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* In case the discard granularity isn't set by buggy device driver */
|
||||
if (WARN_ON_ONCE(!bdev_discard_granularity(bdev))) {
|
||||
pr_err_ratelimited("%pg: Error: discard_granularity is 0.\n",
|
||||
bdev);
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (!nr_sects)
|
||||
return -EINVAL;
|
||||
|
||||
while (nr_sects) {
|
||||
sector_t req_sects =
|
||||
min(nr_sects, bio_discard_limit(bdev, sector));
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_DISCARD, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
bio->bi_iter.bi_size = req_sects << 9;
|
||||
sector += req_sects;
|
||||
nr_sects -= req_sects;
|
||||
|
||||
/*
|
||||
* We can loop for a long time in here, if someone does
|
||||
* full device discards (like mkfs). Be nice and allow
|
||||
* us to schedule out to avoid softlocking if preempt
|
||||
* is disabled.
|
||||
*/
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects,
|
||||
gfp_mask)))
|
||||
*biop = bio_chain_and_submit(*biop, bio);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__blkdev_issue_discard);
|
||||
|
@ -377,6 +377,7 @@ struct bio *__bio_split_to_limits(struct bio *bio,
|
||||
blkcg_bio_issue_init(split);
|
||||
bio_chain(split, bio);
|
||||
trace_block_split(split, bio->bi_iter.bi_sector);
|
||||
WARN_ON_ONCE(bio_zone_write_plugging(bio));
|
||||
submit_bio_noacct(bio);
|
||||
return split;
|
||||
}
|
||||
@ -779,6 +780,8 @@ static void blk_account_io_merge_request(struct request *req)
|
||||
if (blk_do_io_stat(req)) {
|
||||
part_stat_lock();
|
||||
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
|
||||
part_stat_local_dec(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
@ -972,13 +975,7 @@ static void blk_account_io_merge_bio(struct request *req)
|
||||
part_stat_unlock();
|
||||
}
|
||||
|
||||
enum bio_merge_status {
|
||||
BIO_MERGE_OK,
|
||||
BIO_MERGE_NONE,
|
||||
BIO_MERGE_FAILED,
|
||||
};
|
||||
|
||||
static enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
struct bio *bio, unsigned int nr_segs)
|
||||
{
|
||||
const blk_opf_t ff = bio_failfast(bio);
|
||||
@ -994,6 +991,9 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
|
||||
blk_update_mixed_merge(req, bio, false);
|
||||
|
||||
if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
|
||||
blk_zone_write_plug_bio_merged(bio);
|
||||
|
||||
req->biotail->bi_next = bio;
|
||||
req->biotail = bio;
|
||||
req->__data_len += bio->bi_iter.bi_size;
|
||||
@ -1009,6 +1009,14 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
|
||||
{
|
||||
const blk_opf_t ff = bio_failfast(bio);
|
||||
|
||||
/*
|
||||
* A front merge for writes to sequential zones of a zoned block device
|
||||
* can happen only if the user submitted writes out of order. Do not
|
||||
* merge such write to let it fail.
|
||||
*/
|
||||
if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
|
||||
return BIO_MERGE_FAILED;
|
||||
|
||||
if (!ll_front_merge_fn(req, bio, nr_segs))
|
||||
return BIO_MERGE_FAILED;
|
||||
|
||||
@ -1107,10 +1115,9 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
|
||||
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs)
|
||||
{
|
||||
struct blk_plug *plug;
|
||||
struct blk_plug *plug = current->plug;
|
||||
struct request *rq;
|
||||
|
||||
plug = blk_mq_plug(bio);
|
||||
if (!plug || rq_list_empty(plug->mq_list))
|
||||
return false;
|
||||
|
||||
|
@ -1,22 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2017 Western Digital Corporation or its affiliates.
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include "blk-mq-debugfs.h"
|
||||
|
||||
int queue_zone_wlock_show(void *data, struct seq_file *m)
|
||||
{
|
||||
struct request_queue *q = data;
|
||||
unsigned int i;
|
||||
|
||||
if (!q->disk->seq_zones_wlock)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < q->disk->nr_zones; i++)
|
||||
if (test_bit(i, q->disk->seq_zones_wlock))
|
||||
seq_printf(m, "%u\n", i);
|
||||
|
||||
return 0;
|
||||
}
|
@ -160,7 +160,7 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
|
||||
{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
|
||||
{ "pm_only", 0600, queue_pm_only_show, NULL },
|
||||
{ "state", 0600, queue_state_show, queue_state_write },
|
||||
{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
|
||||
{ "zone_wplugs", 0400, queue_zone_wplugs_show, NULL },
|
||||
{ },
|
||||
};
|
||||
|
||||
@ -256,7 +256,6 @@ static const char *const rqf_name[] = {
|
||||
RQF_NAME(HASHED),
|
||||
RQF_NAME(STATS),
|
||||
RQF_NAME(SPECIAL_PAYLOAD),
|
||||
RQF_NAME(ZONE_WRITE_LOCKED),
|
||||
RQF_NAME(TIMED_OUT),
|
||||
RQF_NAME(RESV),
|
||||
};
|
||||
|
@ -83,10 +83,10 @@ static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_DEBUG_FS_ZONED
|
||||
int queue_zone_wlock_show(void *data, struct seq_file *m);
|
||||
#if defined(CONFIG_BLK_DEV_ZONED) && defined(CONFIG_BLK_DEBUG_FS)
|
||||
int queue_zone_wplugs_show(void *data, struct seq_file *m);
|
||||
#else
|
||||
static inline int queue_zone_wlock_show(void *data, struct seq_file *m)
|
||||
static inline int queue_zone_wplugs_show(void *data, struct seq_file *m)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
184
block/blk-mq.c
184
block/blk-mq.c
@ -28,6 +28,7 @@
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/blk-crypto.h>
|
||||
#include <linux/part_stat.h>
|
||||
#include <linux/sched/isolation.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
|
||||
@ -690,6 +691,8 @@ static void blk_mq_finish_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
|
||||
blk_zone_finish_request(rq);
|
||||
|
||||
if (rq->rq_flags & RQF_USE_SCHED) {
|
||||
q->elevator->type->ops.finish_request(rq);
|
||||
/*
|
||||
@ -761,31 +764,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_dump_rq_flags);
|
||||
|
||||
static void req_bio_endio(struct request *rq, struct bio *bio,
|
||||
unsigned int nbytes, blk_status_t error)
|
||||
{
|
||||
if (unlikely(error)) {
|
||||
bio->bi_status = error;
|
||||
} else if (req_op(rq) == REQ_OP_ZONE_APPEND) {
|
||||
/*
|
||||
* Partial zone append completions cannot be supported as the
|
||||
* BIO fragments may end up not being written sequentially.
|
||||
*/
|
||||
if (bio->bi_iter.bi_size != nbytes)
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
else
|
||||
bio->bi_iter.bi_sector = rq->__sector;
|
||||
}
|
||||
|
||||
bio_advance(bio, nbytes);
|
||||
|
||||
if (unlikely(rq->rq_flags & RQF_QUIET))
|
||||
bio_set_flag(bio, BIO_QUIET);
|
||||
/* don't actually finish bio if it's part of flush sequence */
|
||||
if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
static void blk_account_io_completion(struct request *req, unsigned int bytes)
|
||||
{
|
||||
if (req->part && blk_do_io_stat(req)) {
|
||||
@ -845,8 +823,7 @@ static void blk_complete_request(struct request *req)
|
||||
/* Completion has already been traced */
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
|
||||
if (req_op(req) == REQ_OP_ZONE_APPEND)
|
||||
bio->bi_iter.bi_sector = req->__sector;
|
||||
blk_zone_update_request_bio(req, bio);
|
||||
|
||||
if (!is_flush)
|
||||
bio_endio(bio);
|
||||
@ -889,6 +866,8 @@ static void blk_complete_request(struct request *req)
|
||||
bool blk_update_request(struct request *req, blk_status_t error,
|
||||
unsigned int nr_bytes)
|
||||
{
|
||||
bool is_flush = req->rq_flags & RQF_FLUSH_SEQ;
|
||||
bool quiet = req->rq_flags & RQF_QUIET;
|
||||
int total_bytes;
|
||||
|
||||
trace_block_rq_complete(req, error, nr_bytes);
|
||||
@ -909,9 +888,8 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
if (blk_crypto_rq_has_keyslot(req) && nr_bytes >= blk_rq_bytes(req))
|
||||
__blk_crypto_rq_put_keyslot(req);
|
||||
|
||||
if (unlikely(error && !blk_rq_is_passthrough(req) &&
|
||||
!(req->rq_flags & RQF_QUIET)) &&
|
||||
!test_bit(GD_DEAD, &req->q->disk->state)) {
|
||||
if (unlikely(error && !blk_rq_is_passthrough(req) && !quiet) &&
|
||||
!test_bit(GD_DEAD, &req->q->disk->state)) {
|
||||
blk_print_req_error(req, error);
|
||||
trace_block_rq_error(req, error, nr_bytes);
|
||||
}
|
||||
@ -923,12 +901,33 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
struct bio *bio = req->bio;
|
||||
unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
|
||||
|
||||
if (bio_bytes == bio->bi_iter.bi_size)
|
||||
if (unlikely(error))
|
||||
bio->bi_status = error;
|
||||
|
||||
if (bio_bytes == bio->bi_iter.bi_size) {
|
||||
req->bio = bio->bi_next;
|
||||
} else if (bio_is_zone_append(bio) && error == BLK_STS_OK) {
|
||||
/*
|
||||
* Partial zone append completions cannot be supported
|
||||
* as the BIO fragments may end up not being written
|
||||
* sequentially.
|
||||
*/
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
/* Completion has already been traced */
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
req_bio_endio(req, bio, bio_bytes, error);
|
||||
if (unlikely(quiet))
|
||||
bio_set_flag(bio, BIO_QUIET);
|
||||
|
||||
bio_advance(bio, bio_bytes);
|
||||
|
||||
/* Don't actually finish bio if it's part of flush sequence */
|
||||
if (!bio->bi_iter.bi_size) {
|
||||
blk_zone_update_request_bio(req, bio);
|
||||
if (!is_flush)
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
total_bytes += bio_bytes;
|
||||
nr_bytes -= bio_bytes;
|
||||
@ -997,6 +996,8 @@ static inline void blk_account_io_done(struct request *req, u64 now)
|
||||
update_io_ticks(req->part, jiffies, true);
|
||||
part_stat_inc(req->part, ios[sgrp]);
|
||||
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
|
||||
part_stat_local_dec(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
@ -1019,6 +1020,8 @@ static inline void blk_account_io_start(struct request *req)
|
||||
|
||||
part_stat_lock();
|
||||
update_io_ticks(req->part, jiffies, false);
|
||||
part_stat_local_inc(req->part,
|
||||
in_flight[op_is_write(req_op(req))]);
|
||||
part_stat_unlock();
|
||||
}
|
||||
}
|
||||
@ -1330,11 +1333,6 @@ void blk_execute_rq_nowait(struct request *rq, bool at_head)
|
||||
|
||||
blk_account_io_start(rq);
|
||||
|
||||
/*
|
||||
* As plugging can be enabled for passthrough requests on a zoned
|
||||
* device, directly accessing the plug instead of using blk_mq_plug()
|
||||
* should not have any consequences.
|
||||
*/
|
||||
if (current->plug && !at_head) {
|
||||
blk_add_rq_to_plug(current->plug, rq);
|
||||
return;
|
||||
@ -1921,19 +1919,6 @@ static void blk_mq_handle_dev_resource(struct request *rq,
|
||||
__blk_mq_requeue_request(rq);
|
||||
}
|
||||
|
||||
static void blk_mq_handle_zone_resource(struct request *rq,
|
||||
struct list_head *zone_list)
|
||||
{
|
||||
/*
|
||||
* If we end up here it is because we cannot dispatch a request to a
|
||||
* specific zone due to LLD level zone-write locking or other zone
|
||||
* related resource not being available. In this case, set the request
|
||||
* aside in zone_list for retrying it later.
|
||||
*/
|
||||
list_add(&rq->queuelist, zone_list);
|
||||
__blk_mq_requeue_request(rq);
|
||||
}
|
||||
|
||||
enum prep_dispatch {
|
||||
PREP_DISPATCH_OK,
|
||||
PREP_DISPATCH_NO_TAG,
|
||||
@ -2019,7 +2004,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
struct request *rq;
|
||||
int queued;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
LIST_HEAD(zone_list);
|
||||
bool needs_resource = false;
|
||||
|
||||
if (list_empty(list))
|
||||
@ -2061,23 +2045,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_handle_dev_resource(rq, list);
|
||||
goto out;
|
||||
case BLK_STS_ZONE_RESOURCE:
|
||||
/*
|
||||
* Move the request to zone_list and keep going through
|
||||
* the dispatch list to find more requests the drive can
|
||||
* accept.
|
||||
*/
|
||||
blk_mq_handle_zone_resource(rq, &zone_list);
|
||||
needs_resource = true;
|
||||
break;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
}
|
||||
} while (!list_empty(list));
|
||||
out:
|
||||
if (!list_empty(&zone_list))
|
||||
list_splice_tail_init(&zone_list, list);
|
||||
|
||||
/* If we didn't flush the entire list, we could have told the driver
|
||||
* there was more coming, but that turned out to be a lie.
|
||||
*/
|
||||
@ -2163,6 +2135,15 @@ static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
|
||||
return cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* ->next_cpu is always calculated from hctx->cpumask, so simply use
|
||||
* it for speeding up the check
|
||||
*/
|
||||
static bool blk_mq_hctx_empty_cpumask(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
return hctx->next_cpu >= nr_cpu_ids;
|
||||
}
|
||||
|
||||
/*
|
||||
* It'd be great if the workqueue API had a way to pass
|
||||
* in a mask and had some smarts for more clever placement.
|
||||
@ -2174,7 +2155,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
|
||||
bool tried = false;
|
||||
int next_cpu = hctx->next_cpu;
|
||||
|
||||
if (hctx->queue->nr_hw_queues == 1)
|
||||
/* Switch to unbound if no allowable CPUs in this hctx */
|
||||
if (hctx->queue->nr_hw_queues == 1 || blk_mq_hctx_empty_cpumask(hctx))
|
||||
return WORK_CPU_UNBOUND;
|
||||
|
||||
if (--hctx->next_cpu_batch <= 0) {
|
||||
@ -2948,22 +2930,37 @@ static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
void blk_mq_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
struct blk_plug *plug = blk_mq_plug(bio);
|
||||
struct blk_plug *plug = current->plug;
|
||||
const int is_sync = op_is_sync(bio->bi_opf);
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
unsigned int nr_segs = 1;
|
||||
struct request *rq;
|
||||
blk_status_t ret;
|
||||
|
||||
/*
|
||||
* If the plug has a cached request for this queue, try to use it.
|
||||
*/
|
||||
rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
|
||||
|
||||
/*
|
||||
* A BIO that was released from a zone write plug has already been
|
||||
* through the preparation in this function, already holds a reference
|
||||
* on the queue usage counter, and is the only write BIO in-flight for
|
||||
* the target zone. Go straight to preparing a request for it.
|
||||
*/
|
||||
if (bio_zone_write_plugging(bio)) {
|
||||
nr_segs = bio->__bi_nr_segments;
|
||||
if (rq)
|
||||
blk_queue_exit(q);
|
||||
goto new_request;
|
||||
}
|
||||
|
||||
bio = blk_queue_bounce(bio, q);
|
||||
|
||||
/*
|
||||
* If the plug has a cached request for this queue, try use it.
|
||||
*
|
||||
* The cached request already holds a q_usage_counter reference and we
|
||||
* don't have to acquire a new one if we use it.
|
||||
*/
|
||||
rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf);
|
||||
if (!rq) {
|
||||
if (unlikely(bio_queue_enter(bio)))
|
||||
return;
|
||||
@ -2980,6 +2977,10 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
|
||||
goto queue_exit;
|
||||
|
||||
if (blk_queue_is_zoned(q) && blk_zone_plug_bio(bio, nr_segs))
|
||||
goto queue_exit;
|
||||
|
||||
new_request:
|
||||
if (!rq) {
|
||||
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
|
||||
if (unlikely(!rq))
|
||||
@ -3002,6 +3003,9 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
if (bio_zone_write_plugging(bio))
|
||||
blk_zone_write_plug_init_request(rq);
|
||||
|
||||
if (op_is_flush(bio->bi_opf) && blk_insert_flush(rq))
|
||||
return;
|
||||
|
||||
@ -3483,14 +3487,30 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
|
||||
return data.has_rq;
|
||||
}
|
||||
|
||||
static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu,
|
||||
struct blk_mq_hw_ctx *hctx)
|
||||
static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int this_cpu)
|
||||
{
|
||||
if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu)
|
||||
return false;
|
||||
if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids)
|
||||
return false;
|
||||
return true;
|
||||
enum hctx_type type = hctx->type;
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* hctx->cpumask has to rule out isolated CPUs, but userspace still
|
||||
* might submit IOs on these isolated CPUs, so use the queue map to
|
||||
* check if all CPUs mapped to this hctx are offline
|
||||
*/
|
||||
for_each_online_cpu(cpu) {
|
||||
struct blk_mq_hw_ctx *h = blk_mq_map_queue_type(hctx->queue,
|
||||
type, cpu);
|
||||
|
||||
if (h != hctx)
|
||||
continue;
|
||||
|
||||
/* this hctx has at least one online CPU */
|
||||
if (this_cpu != cpu)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
|
||||
@ -3498,8 +3518,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
|
||||
struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
|
||||
struct blk_mq_hw_ctx, cpuhp_online);
|
||||
|
||||
if (!cpumask_test_cpu(cpu, hctx->cpumask) ||
|
||||
!blk_mq_last_cpu_in_hctx(cpu, hctx))
|
||||
if (blk_mq_hctx_has_online_cpu(hctx, cpu))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -3907,6 +3926,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
||||
}
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i) {
|
||||
int cpu;
|
||||
|
||||
/*
|
||||
* If no software queues are mapped to this hardware queue,
|
||||
* disable it and free the request entries.
|
||||
@ -3933,6 +3954,15 @@ static void blk_mq_map_swqueue(struct request_queue *q)
|
||||
*/
|
||||
sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx);
|
||||
|
||||
/*
|
||||
* Rule out isolated CPUs from hctx->cpumask to avoid
|
||||
* running block kworker on isolated CPUs
|
||||
*/
|
||||
for_each_cpu(cpu, hctx->cpumask) {
|
||||
if (cpu_is_isolated(cpu))
|
||||
cpumask_clear_cpu(cpu, hctx->cpumask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize batch roundrobin counts
|
||||
*/
|
||||
|
@ -365,37 +365,6 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
|
||||
qmap->mq_map[cpu] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* blk_mq_plug() - Get caller context plug
|
||||
* @bio : the bio being submitted by the caller context
|
||||
*
|
||||
* Plugging, by design, may delay the insertion of BIOs into the elevator in
|
||||
* order to increase BIO merging opportunities. This however can cause BIO
|
||||
* insertion order to change from the order in which submit_bio() is being
|
||||
* executed in the case of multiple contexts concurrently issuing BIOs to a
|
||||
* device, even if these context are synchronized to tightly control BIO issuing
|
||||
* order. While this is not a problem with regular block devices, this ordering
|
||||
* change can cause write BIO failures with zoned block devices as these
|
||||
* require sequential write patterns to zones. Prevent this from happening by
|
||||
* ignoring the plug state of a BIO issuing context if it is for a zoned block
|
||||
* device and the BIO to plug is a write operation.
|
||||
*
|
||||
* Return current->plug if the bio can be plugged and NULL otherwise
|
||||
*/
|
||||
static inline struct blk_plug *blk_mq_plug( struct bio *bio)
|
||||
{
|
||||
/* Zoned block device write operation case: do not plug the BIO */
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
bdev_op_is_zoned_write(bio->bi_bdev, bio_op(bio)))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* For regular block devices or read operations, use the context plug
|
||||
* which may be NULL if blk_start_plug() was not executed.
|
||||
*/
|
||||
return current->plug;
|
||||
}
|
||||
|
||||
/* Free all requests on the list */
|
||||
static inline void blk_mq_free_requests(struct list_head *list)
|
||||
{
|
||||
|
@ -411,24 +411,32 @@ EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
|
||||
* blk_queue_max_zone_append_sectors - set max sectors for a single zone append
|
||||
* @q: the request queue for the device
|
||||
* @max_zone_append_sectors: maximum number of sectors to write per command
|
||||
*
|
||||
* Sets the maximum number of sectors allowed for zone append commands. If
|
||||
* Specifying 0 for @max_zone_append_sectors indicates that the queue does
|
||||
* not natively support zone append operations and that the block layer must
|
||||
* emulate these operations using regular writes.
|
||||
**/
|
||||
void blk_queue_max_zone_append_sectors(struct request_queue *q,
|
||||
unsigned int max_zone_append_sectors)
|
||||
{
|
||||
unsigned int max_sectors;
|
||||
unsigned int max_sectors = 0;
|
||||
|
||||
if (WARN_ON(!blk_queue_is_zoned(q)))
|
||||
return;
|
||||
|
||||
max_sectors = min(q->limits.max_hw_sectors, max_zone_append_sectors);
|
||||
max_sectors = min(q->limits.chunk_sectors, max_sectors);
|
||||
if (max_zone_append_sectors) {
|
||||
max_sectors = min(q->limits.max_hw_sectors,
|
||||
max_zone_append_sectors);
|
||||
max_sectors = min(q->limits.chunk_sectors, max_sectors);
|
||||
|
||||
/*
|
||||
* Signal eventual driver bugs resulting in the max_zone_append sectors limit
|
||||
* being 0 due to a 0 argument, the chunk_sectors limit (zone size) not set,
|
||||
* or the max_hw_sectors limit not set.
|
||||
*/
|
||||
WARN_ON(!max_sectors);
|
||||
/*
|
||||
* Signal eventual driver bugs resulting in the max_zone_append
|
||||
* sectors limit being 0 due to the chunk_sectors limit (zone
|
||||
* size) not set or the max_hw_sectors limit not set.
|
||||
*/
|
||||
WARN_ON_ONCE(!max_sectors);
|
||||
}
|
||||
|
||||
q->limits.max_zone_append_sectors = max_sectors;
|
||||
}
|
||||
@ -755,8 +763,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
|
||||
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
|
||||
b->max_write_zeroes_sectors);
|
||||
t->max_zone_append_sectors = min(t->max_zone_append_sectors,
|
||||
b->max_zone_append_sectors);
|
||||
t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t),
|
||||
queue_limits_max_zone_append_sectors(b));
|
||||
t->bounce = max(t->bounce, b->bounce);
|
||||
|
||||
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
|
||||
@ -1043,22 +1051,6 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
|
||||
|
||||
/**
|
||||
* blk_queue_required_elevator_features - Set a queue required elevator features
|
||||
* @q: the request queue for the target device
|
||||
* @features: Required elevator features OR'ed together
|
||||
*
|
||||
* Tell the block layer that for the device controlled through @q, only the
|
||||
* only elevators that can be used are those that implement at least the set of
|
||||
* features specified by @features.
|
||||
*/
|
||||
void blk_queue_required_elevator_features(struct request_queue *q,
|
||||
unsigned int features)
|
||||
{
|
||||
q->required_elevator_features = features;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
|
||||
|
||||
/**
|
||||
* blk_queue_can_use_dma_map_merging - configure queue for merging segments.
|
||||
* @q: the request queue for the device
|
||||
|
@ -57,9 +57,6 @@ void blk_stat_add(struct request *rq, u64 now)
|
||||
|
||||
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
|
||||
|
||||
if (req_op(rq) == REQ_OP_READ || req_op(rq) == REQ_OP_WRITE)
|
||||
blk_throtl_stat_add(rq, value);
|
||||
|
||||
rcu_read_lock();
|
||||
cpu = get_cpu();
|
||||
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
|
||||
|
@ -224,7 +224,7 @@ static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
|
||||
|
||||
static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
{
|
||||
unsigned long long max_sectors = q->limits.max_zone_append_sectors;
|
||||
unsigned long long max_sectors = queue_max_zone_append_sectors(q);
|
||||
|
||||
return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
|
||||
}
|
||||
@ -516,10 +516,6 @@ QUEUE_RW_ENTRY(queue_io_timeout, "io_timeout");
|
||||
QUEUE_RO_ENTRY(queue_virt_boundary_mask, "virt_boundary_mask");
|
||||
QUEUE_RO_ENTRY(queue_dma_alignment, "dma_alignment");
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
QUEUE_RW_ENTRY(blk_throtl_sample_time, "throttle_sample_time");
|
||||
#endif
|
||||
|
||||
/* legacy alias for logical_block_size: */
|
||||
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
.attr = {.name = "hw_sector_size", .mode = 0444 },
|
||||
@ -640,9 +636,6 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_fua_entry.attr,
|
||||
&queue_dax_entry.attr,
|
||||
&queue_poll_delay_entry.attr,
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
&blk_throtl_sample_time_entry.attr,
|
||||
#endif
|
||||
&queue_virt_boundary_mask_entry.attr,
|
||||
&queue_dma_alignment_entry.attr,
|
||||
NULL,
|
||||
@ -814,7 +807,6 @@ int blk_register_queue(struct gendisk *disk)
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
|
||||
wbt_enable_default(disk);
|
||||
blk_throtl_register(disk);
|
||||
|
||||
/* Now everything is ready and send out KOBJ_ADD uevent */
|
||||
kobject_uevent(&disk->queue_kobj, KOBJ_ADD);
|
||||
|
1019
block/blk-throttle.c
1019
block/blk-throttle.c
File diff suppressed because it is too large
Load Diff
@ -58,12 +58,6 @@ enum tg_state_flags {
|
||||
THROTL_TG_CANCELING = 1 << 2, /* starts to cancel bio */
|
||||
};
|
||||
|
||||
enum {
|
||||
LIMIT_LOW,
|
||||
LIMIT_MAX,
|
||||
LIMIT_CNT,
|
||||
};
|
||||
|
||||
struct throtl_grp {
|
||||
/* must be the first member */
|
||||
struct blkg_policy_data pd;
|
||||
@ -102,14 +96,14 @@ struct throtl_grp {
|
||||
bool has_rules_iops[2];
|
||||
|
||||
/* internally used bytes per second rate limits */
|
||||
uint64_t bps[2][LIMIT_CNT];
|
||||
uint64_t bps[2];
|
||||
/* user configured bps limits */
|
||||
uint64_t bps_conf[2][LIMIT_CNT];
|
||||
uint64_t bps_conf[2];
|
||||
|
||||
/* internally used IOPS limits */
|
||||
unsigned int iops[2][LIMIT_CNT];
|
||||
unsigned int iops[2];
|
||||
/* user configured IOPS limits */
|
||||
unsigned int iops_conf[2][LIMIT_CNT];
|
||||
unsigned int iops_conf[2];
|
||||
|
||||
/* Number of bytes dispatched in current slice */
|
||||
uint64_t bytes_disp[2];
|
||||
@ -132,22 +126,10 @@ struct throtl_grp {
|
||||
|
||||
unsigned long last_check_time;
|
||||
|
||||
unsigned long latency_target; /* us */
|
||||
unsigned long latency_target_conf; /* us */
|
||||
/* When did we start a new slice */
|
||||
unsigned long slice_start[2];
|
||||
unsigned long slice_end[2];
|
||||
|
||||
unsigned long last_finish_time; /* ns / 1024 */
|
||||
unsigned long checked_last_finish_time; /* ns / 1024 */
|
||||
unsigned long avg_idletime; /* ns / 1024 */
|
||||
unsigned long idletime_threshold; /* us */
|
||||
unsigned long idletime_threshold_conf; /* us */
|
||||
|
||||
unsigned int bio_cnt; /* total bios */
|
||||
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
|
||||
unsigned long bio_cnt_reset_time;
|
||||
|
||||
struct blkg_rwstat stat_bytes;
|
||||
struct blkg_rwstat stat_ios;
|
||||
};
|
||||
@ -168,23 +150,33 @@ static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg)
|
||||
* Internal throttling interface
|
||||
*/
|
||||
#ifndef CONFIG_BLK_DEV_THROTTLING
|
||||
static inline int blk_throtl_init(struct gendisk *disk) { return 0; }
|
||||
static inline void blk_throtl_exit(struct gendisk *disk) { }
|
||||
static inline void blk_throtl_register(struct gendisk *disk) { }
|
||||
static inline bool blk_throtl_bio(struct bio *bio) { return false; }
|
||||
static inline void blk_throtl_cancel_bios(struct gendisk *disk) { }
|
||||
#else /* CONFIG_BLK_DEV_THROTTLING */
|
||||
int blk_throtl_init(struct gendisk *disk);
|
||||
void blk_throtl_exit(struct gendisk *disk);
|
||||
void blk_throtl_register(struct gendisk *disk);
|
||||
bool __blk_throtl_bio(struct bio *bio);
|
||||
void blk_throtl_cancel_bios(struct gendisk *disk);
|
||||
|
||||
static inline bool blk_throtl_activated(struct request_queue *q)
|
||||
{
|
||||
return q->td != NULL;
|
||||
}
|
||||
|
||||
static inline bool blk_should_throtl(struct bio *bio)
|
||||
{
|
||||
struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg);
|
||||
struct throtl_grp *tg;
|
||||
int rw = bio_data_dir(bio);
|
||||
|
||||
/*
|
||||
* This is called under bio_queue_enter(), and it's synchronized with
|
||||
* the activation of blk-throtl, which is protected by
|
||||
* blk_mq_freeze_queue().
|
||||
*/
|
||||
if (!blk_throtl_activated(bio->bi_bdev->bd_queue))
|
||||
return false;
|
||||
|
||||
tg = blkg_to_tg(bio->bi_blkg);
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
|
||||
if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
|
||||
bio_set_flag(bio, BIO_CGROUP_ACCT);
|
||||
|
1508
block/blk-zoned.c
1508
block/blk-zoned.c
File diff suppressed because it is too large
Load Diff
97
block/blk.h
97
block/blk.h
@ -38,6 +38,7 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
|
||||
void blk_queue_start_drain(struct request_queue *q);
|
||||
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
|
||||
void submit_bio_noacct_nocheck(struct bio *bio);
|
||||
void bio_await_chain(struct bio *bio);
|
||||
|
||||
static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
|
||||
{
|
||||
@ -269,6 +270,14 @@ static inline void bio_integrity_free(struct bio *bio)
|
||||
unsigned long blk_rq_timeout(unsigned long timeout);
|
||||
void blk_add_timer(struct request *req);
|
||||
|
||||
enum bio_merge_status {
|
||||
BIO_MERGE_OK,
|
||||
BIO_MERGE_NONE,
|
||||
BIO_MERGE_FAILED,
|
||||
};
|
||||
|
||||
enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
struct bio *bio, unsigned int nr_segs);
|
||||
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
unsigned int nr_segs);
|
||||
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
|
||||
@ -357,6 +366,7 @@ static inline bool blk_do_io_stat(struct request *rq)
|
||||
}
|
||||
|
||||
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
|
||||
unsigned int part_in_flight(struct block_device *part);
|
||||
|
||||
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
|
||||
{
|
||||
@ -378,17 +388,6 @@ static inline void ioc_clear_queue(struct request_queue *q)
|
||||
}
|
||||
#endif /* CONFIG_BLK_ICQ */
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
|
||||
extern ssize_t blk_throtl_sample_time_store(struct request_queue *q,
|
||||
const char *page, size_t count);
|
||||
extern void blk_throtl_bio_endio(struct bio *bio);
|
||||
extern void blk_throtl_stat_add(struct request *rq, u64 time);
|
||||
#else
|
||||
static inline void blk_throtl_bio_endio(struct bio *bio) { }
|
||||
static inline void blk_throtl_stat_add(struct request *rq, u64 time) { }
|
||||
#endif
|
||||
|
||||
struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
|
||||
|
||||
static inline bool blk_queue_may_bounce(struct request_queue *q)
|
||||
@ -407,13 +406,85 @@ static inline struct bio *blk_queue_bounce(struct bio *bio,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
void disk_free_zone_bitmaps(struct gendisk *disk);
|
||||
void disk_init_zone_resources(struct gendisk *disk);
|
||||
void disk_free_zone_resources(struct gendisk *disk);
|
||||
static inline bool bio_zone_write_plugging(struct bio *bio)
|
||||
{
|
||||
return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
|
||||
}
|
||||
static inline bool bio_is_zone_append(struct bio *bio)
|
||||
{
|
||||
return bio_op(bio) == REQ_OP_ZONE_APPEND ||
|
||||
bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
|
||||
}
|
||||
void blk_zone_write_plug_bio_merged(struct bio *bio);
|
||||
void blk_zone_write_plug_init_request(struct request *rq);
|
||||
static inline void blk_zone_update_request_bio(struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* For zone append requests, the request sector indicates the location
|
||||
* at which the BIO data was written. Return this value to the BIO
|
||||
* issuer through the BIO iter sector.
|
||||
* For plugged zone writes, which include emulated zone append, we need
|
||||
* the original BIO sector so that blk_zone_write_plug_bio_endio() can
|
||||
* lookup the zone write plug.
|
||||
*/
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio))
|
||||
bio->bi_iter.bi_sector = rq->__sector;
|
||||
}
|
||||
void blk_zone_write_plug_bio_endio(struct bio *bio);
|
||||
static inline void blk_zone_bio_endio(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* For write BIOs to zoned devices, signal the completion of the BIO so
|
||||
* that the next write BIO can be submitted by zone write plugging.
|
||||
*/
|
||||
if (bio_zone_write_plugging(bio))
|
||||
blk_zone_write_plug_bio_endio(bio);
|
||||
}
|
||||
|
||||
void blk_zone_write_plug_finish_request(struct request *rq);
|
||||
static inline void blk_zone_finish_request(struct request *rq)
|
||||
{
|
||||
if (rq->rq_flags & RQF_ZONE_WRITE_PLUGGING)
|
||||
blk_zone_write_plug_finish_request(rq);
|
||||
}
|
||||
int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline void disk_free_zone_bitmaps(struct gendisk *disk) {}
|
||||
static inline void disk_init_zone_resources(struct gendisk *disk)
|
||||
{
|
||||
}
|
||||
static inline void disk_free_zone_resources(struct gendisk *disk)
|
||||
{
|
||||
}
|
||||
static inline bool bio_zone_write_plugging(struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool bio_is_zone_append(struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void blk_zone_write_plug_bio_merged(struct bio *bio)
|
||||
{
|
||||
}
|
||||
static inline void blk_zone_write_plug_init_request(struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline void blk_zone_update_request_bio(struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
}
|
||||
static inline void blk_zone_bio_endio(struct bio *bio)
|
||||
{
|
||||
}
|
||||
static inline void blk_zone_finish_request(struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
|
@ -83,13 +83,6 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
|
||||
}
|
||||
EXPORT_SYMBOL(elv_bio_merge_ok);
|
||||
|
||||
static inline bool elv_support_features(struct request_queue *q,
|
||||
const struct elevator_type *e)
|
||||
{
|
||||
return (q->required_elevator_features & e->elevator_features) ==
|
||||
q->required_elevator_features;
|
||||
}
|
||||
|
||||
/**
|
||||
* elevator_match - Check whether @e's name or alias matches @name
|
||||
* @e: Scheduler to test
|
||||
@ -120,7 +113,7 @@ static struct elevator_type *elevator_find_get(struct request_queue *q,
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
e = __elevator_find(name);
|
||||
if (e && (!elv_support_features(q, e) || !elevator_tryget(e)))
|
||||
if (e && (!elevator_tryget(e)))
|
||||
e = NULL;
|
||||
spin_unlock(&elv_list_lock);
|
||||
return e;
|
||||
@ -580,34 +573,8 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the first elevator providing the features required by the request queue.
|
||||
* Default to "none" if no matching elevator is found.
|
||||
*/
|
||||
static struct elevator_type *elevator_get_by_features(struct request_queue *q)
|
||||
{
|
||||
struct elevator_type *e, *found = NULL;
|
||||
|
||||
spin_lock(&elv_list_lock);
|
||||
|
||||
list_for_each_entry(e, &elv_list, list) {
|
||||
if (elv_support_features(q, e)) {
|
||||
found = e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (found && !elevator_tryget(found))
|
||||
found = NULL;
|
||||
|
||||
spin_unlock(&elv_list_lock);
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a device queue that has no required features, use the default elevator
|
||||
* settings. Otherwise, use the first elevator available matching the required
|
||||
* features. If no suitable elevator is find or if the chosen elevator
|
||||
* initialization fails, fall back to the "none" elevator (no elevator).
|
||||
* Use the default elevator settings. If the chosen elevator initialization
|
||||
* fails, fall back to the "none" elevator (no elevator).
|
||||
*/
|
||||
void elevator_init_mq(struct request_queue *q)
|
||||
{
|
||||
@ -622,10 +589,7 @@ void elevator_init_mq(struct request_queue *q)
|
||||
if (unlikely(q->elevator))
|
||||
return;
|
||||
|
||||
if (!q->required_elevator_features)
|
||||
e = elevator_get_default(q);
|
||||
else
|
||||
e = elevator_get_by_features(q);
|
||||
e = elevator_get_default(q);
|
||||
if (!e)
|
||||
return;
|
||||
|
||||
@ -781,7 +745,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
|
||||
list_for_each_entry(e, &elv_list, list) {
|
||||
if (e == cur)
|
||||
len += sprintf(name+len, "[%s] ", e->elevator_name);
|
||||
else if (elv_support_features(q, e))
|
||||
else
|
||||
len += sprintf(name+len, "%s ", e->elevator_name);
|
||||
}
|
||||
spin_unlock(&elv_list_lock);
|
||||
|
@ -74,7 +74,6 @@ struct elevator_type
|
||||
struct elv_fs_entry *elevator_attrs;
|
||||
const char *elevator_name;
|
||||
const char *elevator_alias;
|
||||
const unsigned int elevator_features;
|
||||
struct module *elevator_owner;
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
|
||||
|
31
block/fops.c
31
block/fops.c
@ -44,18 +44,15 @@ static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
|
||||
#define DIO_INLINE_BIO_VECS 4
|
||||
|
||||
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
|
||||
struct iov_iter *iter, unsigned int nr_pages)
|
||||
struct iov_iter *iter, struct block_device *bdev,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
|
||||
loff_t pos = iocb->ki_pos;
|
||||
bool should_dirty = false;
|
||||
struct bio bio;
|
||||
ssize_t ret;
|
||||
|
||||
if (blkdev_dio_unaligned(bdev, pos, iter))
|
||||
return -EINVAL;
|
||||
|
||||
if (nr_pages <= DIO_INLINE_BIO_VECS)
|
||||
vecs = inline_vecs;
|
||||
else {
|
||||
@ -161,9 +158,8 @@ static void blkdev_bio_end_io(struct bio *bio)
|
||||
}
|
||||
|
||||
static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
unsigned int nr_pages)
|
||||
struct block_device *bdev, unsigned int nr_pages)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
struct blk_plug plug;
|
||||
struct blkdev_dio *dio;
|
||||
struct bio *bio;
|
||||
@ -172,9 +168,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
loff_t pos = iocb->ki_pos;
|
||||
int ret = 0;
|
||||
|
||||
if (blkdev_dio_unaligned(bdev, pos, iter))
|
||||
return -EINVAL;
|
||||
|
||||
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
|
||||
opf |= REQ_ALLOC_CACHE;
|
||||
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
|
||||
@ -302,9 +295,9 @@ static void blkdev_bio_end_io_async(struct bio *bio)
|
||||
|
||||
static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
||||
struct iov_iter *iter,
|
||||
struct block_device *bdev,
|
||||
unsigned int nr_pages)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
bool is_read = iov_iter_rw(iter) == READ;
|
||||
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
|
||||
struct blkdev_dio *dio;
|
||||
@ -312,9 +305,6 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
||||
loff_t pos = iocb->ki_pos;
|
||||
int ret = 0;
|
||||
|
||||
if (blkdev_dio_unaligned(bdev, pos, iter))
|
||||
return -EINVAL;
|
||||
|
||||
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
|
||||
opf |= REQ_ALLOC_CACHE;
|
||||
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
|
||||
@ -368,18 +358,23 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
||||
|
||||
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
unsigned int nr_pages;
|
||||
|
||||
if (!iov_iter_count(iter))
|
||||
return 0;
|
||||
|
||||
if (blkdev_dio_unaligned(bdev, iocb->ki_pos, iter))
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
|
||||
if (likely(nr_pages <= BIO_MAX_VECS)) {
|
||||
if (is_sync_kiocb(iocb))
|
||||
return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
|
||||
return __blkdev_direct_IO_async(iocb, iter, nr_pages);
|
||||
return __blkdev_direct_IO_simple(iocb, iter, bdev,
|
||||
nr_pages);
|
||||
return __blkdev_direct_IO_async(iocb, iter, bdev, nr_pages);
|
||||
}
|
||||
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
|
||||
return __blkdev_direct_IO(iocb, iter, bdev, bio_max_segs(nr_pages));
|
||||
}
|
||||
|
||||
static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
@ -390,7 +385,7 @@ static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
|
||||
iomap->bdev = bdev;
|
||||
iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
|
||||
if (iomap->offset >= isize)
|
||||
if (offset >= isize)
|
||||
return -EIO;
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
iomap->addr = iomap->offset;
|
||||
|
@ -118,7 +118,7 @@ static void part_stat_read_all(struct block_device *part,
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int part_in_flight(struct block_device *part)
|
||||
unsigned int part_in_flight(struct block_device *part)
|
||||
{
|
||||
unsigned int inflight = 0;
|
||||
int cpu;
|
||||
@ -345,9 +345,7 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode)
|
||||
struct file *file;
|
||||
int ret = 0;
|
||||
|
||||
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
|
||||
return -EINVAL;
|
||||
if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
|
||||
if (!disk_has_partscan(disk))
|
||||
return -EINVAL;
|
||||
if (disk->open_partitions)
|
||||
return -EBUSY;
|
||||
@ -503,8 +501,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
goto out_unregister_bdi;
|
||||
|
||||
/* Make sure the first partition scan will be proceed */
|
||||
if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
|
||||
!test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
|
||||
if (get_capacity(disk) && disk_has_partscan(disk))
|
||||
set_bit(GD_NEED_PART_SCAN, &disk->state);
|
||||
|
||||
bdev_add(disk->part0, ddev->devt);
|
||||
@ -954,15 +951,10 @@ ssize_t part_stat_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct block_device *bdev = dev_to_bdev(dev);
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
struct disk_stats stat;
|
||||
unsigned int inflight;
|
||||
|
||||
if (queue_is_mq(q))
|
||||
inflight = blk_mq_in_flight(q, bdev);
|
||||
else
|
||||
inflight = part_in_flight(bdev);
|
||||
|
||||
inflight = part_in_flight(bdev);
|
||||
if (inflight) {
|
||||
part_stat_lock();
|
||||
update_io_ticks(bdev, jiffies, true);
|
||||
@ -1047,6 +1039,12 @@ static ssize_t diskseq_show(struct device *dev,
|
||||
return sprintf(buf, "%llu\n", disk->diskseq);
|
||||
}
|
||||
|
||||
static ssize_t partscan_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
|
||||
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
|
||||
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
|
||||
@ -1060,6 +1058,7 @@ static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
|
||||
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
|
||||
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
|
||||
static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
|
||||
static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
|
||||
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
ssize_t part_fail_show(struct device *dev,
|
||||
@ -1106,6 +1105,7 @@ static struct attribute *disk_attrs[] = {
|
||||
&dev_attr_events_async.attr,
|
||||
&dev_attr_events_poll_msecs.attr,
|
||||
&dev_attr_diskseq.attr,
|
||||
&dev_attr_partscan.attr,
|
||||
#ifdef CONFIG_FAIL_MAKE_REQUEST
|
||||
&dev_attr_fail.attr,
|
||||
#endif
|
||||
@ -1182,7 +1182,7 @@ static void disk_release(struct device *dev)
|
||||
|
||||
disk_release_events(disk);
|
||||
kfree(disk->random);
|
||||
disk_free_zone_bitmaps(disk);
|
||||
disk_free_zone_resources(disk);
|
||||
xa_destroy(&disk->part_tbl);
|
||||
|
||||
disk->queue->disk = NULL;
|
||||
@ -1251,11 +1251,8 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
||||
xa_for_each(&gp->part_tbl, idx, hd) {
|
||||
if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
|
||||
continue;
|
||||
if (queue_is_mq(gp->queue))
|
||||
inflight = blk_mq_in_flight(gp->queue, hd);
|
||||
else
|
||||
inflight = part_in_flight(hd);
|
||||
|
||||
inflight = part_in_flight(hd);
|
||||
if (inflight) {
|
||||
part_stat_lock();
|
||||
update_io_ticks(hd, jiffies, true);
|
||||
@ -1364,6 +1361,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
|
||||
if (blkcg_init_disk(disk))
|
||||
goto out_erase_part0;
|
||||
|
||||
disk_init_zone_resources(disk);
|
||||
rand_initialize_disk(disk);
|
||||
disk_to_dev(disk)->class = &block_class;
|
||||
disk_to_dev(disk)->type = &disk_type;
|
||||
|
@ -33,7 +33,7 @@ static int blkpg_do_ioctl(struct block_device *bdev,
|
||||
if (op == BLKPG_DEL_PARTITION)
|
||||
return bdev_del_partition(disk, p.pno);
|
||||
|
||||
if (p.start < 0 || p.length <= 0 || p.start + p.length < 0)
|
||||
if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start)
|
||||
return -EINVAL;
|
||||
/* Check that the partition is aligned to the block size */
|
||||
if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev)))
|
||||
@ -95,9 +95,12 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
|
||||
static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
unsigned long arg)
|
||||
{
|
||||
uint64_t range[2];
|
||||
uint64_t start, len, end;
|
||||
unsigned int bs_mask = bdev_logical_block_size(bdev) - 1;
|
||||
struct inode *inode = bdev->bd_inode;
|
||||
uint64_t range[2], start, len, end;
|
||||
struct bio *prev = NULL, *bio;
|
||||
sector_t sector, nr_sects;
|
||||
struct blk_plug plug;
|
||||
int err;
|
||||
|
||||
if (!(mode & BLK_OPEN_WRITE))
|
||||
@ -105,6 +108,8 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
|
||||
if (!bdev_max_discard_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(range, (void __user *)arg, sizeof(range)))
|
||||
return -EFAULT;
|
||||
@ -112,9 +117,9 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
start = range[0];
|
||||
len = range[1];
|
||||
|
||||
if (start & 511)
|
||||
if (!len)
|
||||
return -EINVAL;
|
||||
if (len & 511)
|
||||
if ((start | len) & bs_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (check_add_overflow(start, len, &end) ||
|
||||
@ -125,7 +130,32 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
|
||||
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
|
||||
if (err)
|
||||
goto fail;
|
||||
err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
|
||||
|
||||
sector = start >> SECTOR_SHIFT;
|
||||
nr_sects = len >> SECTOR_SHIFT;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
while (1) {
|
||||
if (fatal_signal_pending(current)) {
|
||||
if (prev)
|
||||
bio_await_chain(prev);
|
||||
err = -EINTR;
|
||||
goto out_unplug;
|
||||
}
|
||||
bio = blk_alloc_discard_bio(bdev, §or, &nr_sects,
|
||||
GFP_KERNEL);
|
||||
if (!bio)
|
||||
break;
|
||||
prev = bio_chain_and_submit(prev, bio);
|
||||
}
|
||||
if (prev) {
|
||||
err = submit_bio_wait(prev);
|
||||
if (err == -EOPNOTSUPP)
|
||||
err = 0;
|
||||
bio_put(prev);
|
||||
}
|
||||
out_unplug:
|
||||
blk_finish_plug(&plug);
|
||||
fail:
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
return err;
|
||||
|
@ -102,7 +102,6 @@ struct deadline_data {
|
||||
int prio_aging_expire;
|
||||
|
||||
spinlock_t lock;
|
||||
spinlock_t zone_lock;
|
||||
};
|
||||
|
||||
/* Maps an I/O priority class to a deadline scheduler priority. */
|
||||
@ -129,36 +128,7 @@ static u8 dd_rq_ioclass(struct request *rq)
|
||||
}
|
||||
|
||||
/*
|
||||
* get the request before `rq' in sector-sorted order
|
||||
*/
|
||||
static inline struct request *
|
||||
deadline_earlier_request(struct request *rq)
|
||||
{
|
||||
struct rb_node *node = rb_prev(&rq->rb_node);
|
||||
|
||||
if (node)
|
||||
return rb_entry_rq(node);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* get the request after `rq' in sector-sorted order
|
||||
*/
|
||||
static inline struct request *
|
||||
deadline_latter_request(struct request *rq)
|
||||
{
|
||||
struct rb_node *node = rb_next(&rq->rb_node);
|
||||
|
||||
if (node)
|
||||
return rb_entry_rq(node);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the first request for which blk_rq_pos() >= @pos. For zoned devices,
|
||||
* return the first request after the start of the zone containing @pos.
|
||||
* Return the first request for which blk_rq_pos() >= @pos.
|
||||
*/
|
||||
static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
|
||||
enum dd_data_dir data_dir, sector_t pos)
|
||||
@ -170,14 +140,6 @@ static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
|
||||
return NULL;
|
||||
|
||||
rq = rb_entry_rq(node);
|
||||
/*
|
||||
* A zoned write may have been requeued with a starting position that
|
||||
* is below that of the most recently dispatched request. Hence, for
|
||||
* zoned writes, start searching from the start of a zone.
|
||||
*/
|
||||
if (blk_rq_is_seq_zoned_write(rq))
|
||||
pos = round_down(pos, rq->q->limits.chunk_sectors);
|
||||
|
||||
while (node) {
|
||||
rq = rb_entry_rq(node);
|
||||
if (blk_rq_pos(rq) >= pos) {
|
||||
@ -308,36 +270,6 @@ static inline bool deadline_check_fifo(struct dd_per_prio *per_prio,
|
||||
return time_is_before_eq_jiffies((unsigned long)rq->fifo_time);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if rq has a sequential request preceding it.
|
||||
*/
|
||||
static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq)
|
||||
{
|
||||
struct request *prev = deadline_earlier_request(rq);
|
||||
|
||||
if (!prev)
|
||||
return false;
|
||||
|
||||
return blk_rq_pos(prev) + blk_rq_sectors(prev) == blk_rq_pos(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip all write requests that are sequential from @rq, even if we cross
|
||||
* a zone boundary.
|
||||
*/
|
||||
static struct request *deadline_skip_seq_writes(struct deadline_data *dd,
|
||||
struct request *rq)
|
||||
{
|
||||
sector_t pos = blk_rq_pos(rq);
|
||||
|
||||
do {
|
||||
pos += blk_rq_sectors(rq);
|
||||
rq = deadline_latter_request(rq);
|
||||
} while (rq && blk_rq_pos(rq) == pos);
|
||||
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* For the specified data direction, return the next request to
|
||||
* dispatch using arrival ordered lists.
|
||||
@ -346,40 +278,10 @@ static struct request *
|
||||
deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
|
||||
enum dd_data_dir data_dir)
|
||||
{
|
||||
struct request *rq, *rb_rq, *next;
|
||||
unsigned long flags;
|
||||
|
||||
if (list_empty(&per_prio->fifo_list[data_dir]))
|
||||
return NULL;
|
||||
|
||||
rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
|
||||
if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
|
||||
return rq;
|
||||
|
||||
/*
|
||||
* Look for a write request that can be dispatched, that is one with
|
||||
* an unlocked target zone. For some HDDs, breaking a sequential
|
||||
* write stream can lead to lower throughput, so make sure to preserve
|
||||
* sequential write streams, even if that stream crosses into the next
|
||||
* zones and these zones are unlocked.
|
||||
*/
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
list_for_each_entry_safe(rq, next, &per_prio->fifo_list[DD_WRITE],
|
||||
queuelist) {
|
||||
/* Check whether a prior request exists for the same zone. */
|
||||
rb_rq = deadline_from_pos(per_prio, data_dir, blk_rq_pos(rq));
|
||||
if (rb_rq && blk_rq_pos(rb_rq) < blk_rq_pos(rq))
|
||||
rq = rb_rq;
|
||||
if (blk_req_can_dispatch_to_zone(rq) &&
|
||||
(blk_queue_nonrot(rq->q) ||
|
||||
!deadline_is_seq_write(dd, rq)))
|
||||
goto out;
|
||||
}
|
||||
rq = NULL;
|
||||
out:
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
|
||||
return rq;
|
||||
return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -390,36 +292,8 @@ static struct request *
|
||||
deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
|
||||
enum dd_data_dir data_dir)
|
||||
{
|
||||
struct request *rq;
|
||||
unsigned long flags;
|
||||
|
||||
rq = deadline_from_pos(per_prio, data_dir,
|
||||
per_prio->latest_pos[data_dir]);
|
||||
if (!rq)
|
||||
return NULL;
|
||||
|
||||
if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
|
||||
return rq;
|
||||
|
||||
/*
|
||||
* Look for a write request that can be dispatched, that is one with
|
||||
* an unlocked target zone. For some HDDs, breaking a sequential
|
||||
* write stream can lead to lower throughput, so make sure to preserve
|
||||
* sequential write streams, even if that stream crosses into the next
|
||||
* zones and these zones are unlocked.
|
||||
*/
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
while (rq) {
|
||||
if (blk_req_can_dispatch_to_zone(rq))
|
||||
break;
|
||||
if (blk_queue_nonrot(rq->q))
|
||||
rq = deadline_latter_request(rq);
|
||||
else
|
||||
rq = deadline_skip_seq_writes(dd, rq);
|
||||
}
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
|
||||
return rq;
|
||||
return deadline_from_pos(per_prio, data_dir,
|
||||
per_prio->latest_pos[data_dir]);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -525,10 +399,6 @@ dispatch_find_request:
|
||||
rq = next_rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a zoned block device, if we only have writes queued and none of
|
||||
* them can be dispatched, rq will be NULL.
|
||||
*/
|
||||
if (!rq)
|
||||
return NULL;
|
||||
|
||||
@ -549,10 +419,6 @@ done:
|
||||
prio = ioprio_class_to_prio[ioprio_class];
|
||||
dd->per_prio[prio].latest_pos[data_dir] = blk_rq_pos(rq);
|
||||
dd->per_prio[prio].stats.dispatched++;
|
||||
/*
|
||||
* If the request needs its target zone locked, do it.
|
||||
*/
|
||||
blk_req_zone_write_lock(rq);
|
||||
rq->rq_flags |= RQF_STARTED;
|
||||
return rq;
|
||||
}
|
||||
@ -722,7 +588,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
|
||||
dd->fifo_batch = fifo_batch;
|
||||
dd->prio_aging_expire = prio_aging_expire;
|
||||
spin_lock_init(&dd->lock);
|
||||
spin_lock_init(&dd->zone_lock);
|
||||
|
||||
/* We dispatch from request queue wide instead of hw queue */
|
||||
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
|
||||
@ -804,12 +669,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
|
||||
lockdep_assert_held(&dd->lock);
|
||||
|
||||
/*
|
||||
* This may be a requeue of a write request that has locked its
|
||||
* target zone. If it is the case, this releases the zone lock.
|
||||
*/
|
||||
blk_req_zone_write_unlock(rq);
|
||||
|
||||
prio = ioprio_class_to_prio[ioprio_class];
|
||||
per_prio = &dd->per_prio[prio];
|
||||
if (!rq->elv.priv[0]) {
|
||||
@ -841,18 +700,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
|
||||
*/
|
||||
rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
|
||||
insert_before = &per_prio->fifo_list[data_dir];
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
/*
|
||||
* Insert zoned writes such that requests are sorted by
|
||||
* position per zone.
|
||||
*/
|
||||
if (blk_rq_is_seq_zoned_write(rq)) {
|
||||
struct request *rq2 = deadline_latter_request(rq);
|
||||
|
||||
if (rq2 && blk_rq_zone_no(rq2) == blk_rq_zone_no(rq))
|
||||
insert_before = &rq2->queuelist;
|
||||
}
|
||||
#endif
|
||||
list_add_tail(&rq->queuelist, insert_before);
|
||||
}
|
||||
}
|
||||
@ -887,33 +734,8 @@ static void dd_prepare_request(struct request *rq)
|
||||
rq->elv.priv[0] = NULL;
|
||||
}
|
||||
|
||||
static bool dd_has_write_work(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
|
||||
enum dd_prio p;
|
||||
|
||||
for (p = 0; p <= DD_PRIO_MAX; p++)
|
||||
if (!list_empty_careful(&dd->per_prio[p].fifo_list[DD_WRITE]))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback from inside blk_mq_free_request().
|
||||
*
|
||||
* For zoned block devices, write unlock the target zone of
|
||||
* completed write requests. Do this while holding the zone lock
|
||||
* spinlock so that the zone is never unlocked while deadline_fifo_request()
|
||||
* or deadline_next_request() are executing. This function is called for
|
||||
* all requests, whether or not these requests complete successfully.
|
||||
*
|
||||
* For a zoned block device, __dd_dispatch_request() may have stopped
|
||||
* dispatching requests if all the queued requests are write requests directed
|
||||
* at zones that are already locked due to on-going write requests. To ensure
|
||||
* write request dispatch progress in this case, mark the queue as needing a
|
||||
* restart to ensure that the queue is run again after completion of the
|
||||
* request and zones being unlocked.
|
||||
*/
|
||||
static void dd_finish_request(struct request *rq)
|
||||
{
|
||||
@ -928,21 +750,8 @@ static void dd_finish_request(struct request *rq)
|
||||
* called dd_insert_requests(). Skip requests that bypassed I/O
|
||||
* scheduling. See also blk_mq_request_bypass_insert().
|
||||
*/
|
||||
if (!rq->elv.priv[0])
|
||||
return;
|
||||
|
||||
atomic_inc(&per_prio->stats.completed);
|
||||
|
||||
if (blk_queue_is_zoned(q)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dd->zone_lock, flags);
|
||||
blk_req_zone_write_unlock(rq);
|
||||
spin_unlock_irqrestore(&dd->zone_lock, flags);
|
||||
|
||||
if (dd_has_write_work(rq->mq_hctx))
|
||||
blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
|
||||
}
|
||||
if (rq->elv.priv[0])
|
||||
atomic_inc(&per_prio->stats.completed);
|
||||
}
|
||||
|
||||
static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)
|
||||
@ -1266,7 +1075,6 @@ static struct elevator_type mq_deadline = {
|
||||
.elevator_attrs = deadline_attrs,
|
||||
.elevator_name = "mq-deadline",
|
||||
.elevator_alias = "deadline",
|
||||
.elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
|
||||
.elevator_owner = THIS_MODULE,
|
||||
};
|
||||
MODULE_ALIAS("mq-deadline-iosched");
|
||||
|
@ -70,8 +70,8 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
|
||||
}
|
||||
|
||||
if (*partdef == '(') {
|
||||
int length;
|
||||
char *next = strchr(++partdef, ')');
|
||||
partdef++;
|
||||
char *next = strsep(&partdef, ")");
|
||||
|
||||
if (!next) {
|
||||
pr_warn("cmdline partition format is invalid.");
|
||||
@ -79,11 +79,7 @@ static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
length = min_t(int, next - partdef,
|
||||
sizeof(new_subpart->name) - 1);
|
||||
strscpy(new_subpart->name, partdef, length);
|
||||
|
||||
partdef = ++next;
|
||||
strscpy(new_subpart->name, next, sizeof(new_subpart->name));
|
||||
} else
|
||||
new_subpart->name[0] = '\0';
|
||||
|
||||
@ -117,14 +113,12 @@ static void free_subpart(struct cmdline_parts *parts)
|
||||
}
|
||||
}
|
||||
|
||||
static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
|
||||
static int parse_parts(struct cmdline_parts **parts, char *bdevdef)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
char *next;
|
||||
int length;
|
||||
struct cmdline_subpart **next_subpart;
|
||||
struct cmdline_parts *newparts;
|
||||
char buf[BDEVNAME_SIZE + 32 + 4];
|
||||
|
||||
*parts = NULL;
|
||||
|
||||
@ -132,28 +126,19 @@ static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
|
||||
if (!newparts)
|
||||
return -ENOMEM;
|
||||
|
||||
next = strchr(bdevdef, ':');
|
||||
next = strsep(&bdevdef, ":");
|
||||
if (!next) {
|
||||
pr_warn("cmdline partition has no block device.");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
|
||||
strscpy(newparts->name, bdevdef, length);
|
||||
strscpy(newparts->name, next, sizeof(newparts->name));
|
||||
newparts->nr_subparts = 0;
|
||||
|
||||
next_subpart = &newparts->subpart;
|
||||
|
||||
while (next && *(++next)) {
|
||||
bdevdef = next;
|
||||
next = strchr(bdevdef, ',');
|
||||
|
||||
length = (!next) ? (sizeof(buf) - 1) :
|
||||
min_t(int, next - bdevdef, sizeof(buf) - 1);
|
||||
|
||||
strscpy(buf, bdevdef, length);
|
||||
|
||||
ret = parse_subpart(next_subpart, buf);
|
||||
while ((next = strsep(&bdevdef, ","))) {
|
||||
ret = parse_subpart(next_subpart, next);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
@ -199,24 +184,17 @@ static int cmdline_parts_parse(struct cmdline_parts **parts,
|
||||
|
||||
*parts = NULL;
|
||||
|
||||
next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
|
||||
pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
next_parts = parts;
|
||||
|
||||
while (next && *pbuf) {
|
||||
next = strchr(pbuf, ';');
|
||||
if (next)
|
||||
*next = '\0';
|
||||
|
||||
ret = parse_parts(next_parts, pbuf);
|
||||
while ((next = strsep(&pbuf, ";"))) {
|
||||
ret = parse_parts(next_parts, next);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
if (next)
|
||||
pbuf = ++next;
|
||||
|
||||
next_parts = &(*next_parts)->next_parts;
|
||||
}
|
||||
|
||||
@ -250,7 +228,6 @@ static struct cmdline_parts *bdev_parts;
|
||||
static int add_part(int slot, struct cmdline_subpart *subpart,
|
||||
struct parsed_partitions *state)
|
||||
{
|
||||
int label_min;
|
||||
struct partition_meta_info *info;
|
||||
char tmp[sizeof(info->volname) + 4];
|
||||
|
||||
@ -262,9 +239,7 @@ static int add_part(int slot, struct cmdline_subpart *subpart,
|
||||
|
||||
info = &state->parts[slot].info;
|
||||
|
||||
label_min = min_t(int, sizeof(info->volname) - 1,
|
||||
sizeof(subpart->name));
|
||||
strscpy(info->volname, subpart->name, label_min);
|
||||
strscpy(info->volname, subpart->name, sizeof(info->volname));
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
|
||||
strlcat(state->pp_buf, tmp, PAGE_SIZE);
|
||||
|
@ -573,10 +573,7 @@ static int blk_add_partitions(struct gendisk *disk)
|
||||
struct parsed_partitions *state;
|
||||
int ret = -EAGAIN, p;
|
||||
|
||||
if (disk->flags & GENHD_FL_NO_PART)
|
||||
return 0;
|
||||
|
||||
if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
|
||||
if (!disk_has_partscan(disk))
|
||||
return 0;
|
||||
|
||||
state = check_partition(disk);
|
||||
|
@ -29,10 +29,7 @@
|
||||
|
||||
/*
|
||||
* Each block ramdisk device has a xarray brd_pages of pages that stores
|
||||
* the pages containing the block device's contents. A brd page's ->index is
|
||||
* its offset in PAGE_SIZE units. This is similar to, but in no way connected
|
||||
* with, the kernel's pagecache or buffer cache (which sit above our block
|
||||
* device).
|
||||
* the pages containing the block device's contents.
|
||||
*/
|
||||
struct brd_device {
|
||||
int brd_number;
|
||||
@ -51,15 +48,7 @@ struct brd_device {
|
||||
*/
|
||||
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
|
||||
{
|
||||
pgoff_t idx;
|
||||
struct page *page;
|
||||
|
||||
idx = sector >> PAGE_SECTORS_SHIFT; /* sector to page index */
|
||||
page = xa_load(&brd->brd_pages, idx);
|
||||
|
||||
BUG_ON(page && page->index != idx);
|
||||
|
||||
return page;
|
||||
return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -67,8 +56,8 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
|
||||
*/
|
||||
static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
|
||||
{
|
||||
pgoff_t idx;
|
||||
struct page *page, *cur;
|
||||
pgoff_t idx = sector >> PAGE_SECTORS_SHIFT;
|
||||
struct page *page;
|
||||
int ret = 0;
|
||||
|
||||
page = brd_lookup_page(brd, sector);
|
||||
@ -80,23 +69,16 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
|
||||
return -ENOMEM;
|
||||
|
||||
xa_lock(&brd->brd_pages);
|
||||
|
||||
idx = sector >> PAGE_SECTORS_SHIFT;
|
||||
page->index = idx;
|
||||
|
||||
cur = __xa_cmpxchg(&brd->brd_pages, idx, NULL, page, gfp);
|
||||
|
||||
if (unlikely(cur)) {
|
||||
__free_page(page);
|
||||
ret = xa_err(cur);
|
||||
if (!ret && (cur->index != idx))
|
||||
ret = -EIO;
|
||||
} else {
|
||||
ret = __xa_insert(&brd->brd_pages, idx, page, gfp);
|
||||
if (!ret)
|
||||
brd->brd_nr_pages++;
|
||||
}
|
||||
|
||||
xa_unlock(&brd->brd_pages);
|
||||
|
||||
if (ret < 0) {
|
||||
__free_page(page);
|
||||
if (ret == -EBUSY)
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -225,6 +225,10 @@ static unsigned long g_cache_size;
|
||||
module_param_named(cache_size, g_cache_size, ulong, 0444);
|
||||
MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
|
||||
|
||||
static bool g_fua = true;
|
||||
module_param_named(fua, g_fua, bool, 0444);
|
||||
MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
|
||||
|
||||
static unsigned int g_mbps;
|
||||
module_param_named(mbps, g_mbps, uint, 0444);
|
||||
MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
|
||||
@ -253,6 +257,11 @@ static unsigned int g_zone_max_active;
|
||||
module_param_named(zone_max_active, g_zone_max_active, uint, 0444);
|
||||
MODULE_PARM_DESC(zone_max_active, "Maximum number of active zones when block device is zoned. Default: 0 (no limit)");
|
||||
|
||||
static int g_zone_append_max_sectors = INT_MAX;
|
||||
module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444);
|
||||
MODULE_PARM_DESC(zone_append_max_sectors,
|
||||
"Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
|
||||
|
||||
static struct nullb_device *null_alloc_dev(void);
|
||||
static void null_free_dev(struct nullb_device *dev);
|
||||
static void null_del_dev(struct nullb *nullb);
|
||||
@ -436,10 +445,12 @@ NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(fua, bool, NULL);
|
||||
|
||||
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
|
||||
{
|
||||
@ -580,12 +591,14 @@ static struct configfs_attribute *nullb_device_attrs[] = {
|
||||
&nullb_device_attr_zone_nr_conv,
|
||||
&nullb_device_attr_zone_max_open,
|
||||
&nullb_device_attr_zone_max_active,
|
||||
&nullb_device_attr_zone_append_max_sectors,
|
||||
&nullb_device_attr_zone_readonly,
|
||||
&nullb_device_attr_zone_offline,
|
||||
&nullb_device_attr_virt_boundary,
|
||||
&nullb_device_attr_no_sched,
|
||||
&nullb_device_attr_shared_tags,
|
||||
&nullb_device_attr_shared_tag_bitmap,
|
||||
&nullb_device_attr_fua,
|
||||
NULL,
|
||||
};
|
||||
|
||||
@ -664,14 +677,14 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
|
||||
static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
||||
{
|
||||
return snprintf(page, PAGE_SIZE,
|
||||
"badblocks,blocking,blocksize,cache_size,"
|
||||
"badblocks,blocking,blocksize,cache_size,fua,"
|
||||
"completion_nsec,discard,home_node,hw_queue_depth,"
|
||||
"irqmode,max_sectors,mbps,memory_backed,no_sched,"
|
||||
"poll_queues,power,queue_mode,shared_tag_bitmap,"
|
||||
"shared_tags,size,submit_queues,use_per_node_hctx,"
|
||||
"virt_boundary,zoned,zone_capacity,zone_max_active,"
|
||||
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
|
||||
"zone_size\n");
|
||||
"zone_size,zone_append_max_sectors\n");
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR_RO(memb_group_, features);
|
||||
@ -751,10 +764,13 @@ static struct nullb_device *null_alloc_dev(void)
|
||||
dev->zone_nr_conv = g_zone_nr_conv;
|
||||
dev->zone_max_open = g_zone_max_open;
|
||||
dev->zone_max_active = g_zone_max_active;
|
||||
dev->zone_append_max_sectors = g_zone_append_max_sectors;
|
||||
dev->virt_boundary = g_virt_boundary;
|
||||
dev->no_sched = g_no_sched;
|
||||
dev->shared_tags = g_shared_tags;
|
||||
dev->shared_tag_bitmap = g_shared_tag_bitmap;
|
||||
dev->fua = g_fua;
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
@ -1151,7 +1167,7 @@ blk_status_t null_handle_discard(struct nullb_device *dev,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static int null_handle_flush(struct nullb *nullb)
|
||||
static blk_status_t null_handle_flush(struct nullb *nullb)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -1168,7 +1184,7 @@ static int null_handle_flush(struct nullb *nullb)
|
||||
|
||||
WARN_ON(!radix_tree_empty(&nullb->dev->cache));
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
return err;
|
||||
return errno_to_blk_status(err);
|
||||
}
|
||||
|
||||
static int null_transfer(struct nullb *nullb, struct page *page,
|
||||
@ -1206,7 +1222,7 @@ static int null_handle_rq(struct nullb_cmd *cmd)
|
||||
{
|
||||
struct request *rq = blk_mq_rq_from_pdu(cmd);
|
||||
struct nullb *nullb = cmd->nq->dev->nullb;
|
||||
int err;
|
||||
int err = 0;
|
||||
unsigned int len;
|
||||
sector_t sector = blk_rq_pos(rq);
|
||||
struct req_iterator iter;
|
||||
@ -1218,15 +1234,13 @@ static int null_handle_rq(struct nullb_cmd *cmd)
|
||||
err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
|
||||
op_is_write(req_op(rq)), sector,
|
||||
rq->cmd_flags & REQ_FUA);
|
||||
if (err) {
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
return err;
|
||||
}
|
||||
if (err)
|
||||
break;
|
||||
sector += len >> SECTOR_SHIFT;
|
||||
}
|
||||
spin_unlock_irq(&nullb->lock);
|
||||
|
||||
return 0;
|
||||
return errno_to_blk_status(err);
|
||||
}
|
||||
|
||||
static inline blk_status_t null_handle_throttled(struct nullb_cmd *cmd)
|
||||
@ -1273,8 +1287,8 @@ static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd,
|
||||
|
||||
if (op == REQ_OP_DISCARD)
|
||||
return null_handle_discard(dev, sector, nr_sectors);
|
||||
return errno_to_blk_status(null_handle_rq(cmd));
|
||||
|
||||
return null_handle_rq(cmd);
|
||||
}
|
||||
|
||||
static void nullb_zero_read_cmd_buffer(struct nullb_cmd *cmd)
|
||||
@ -1343,7 +1357,7 @@ static void null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
|
||||
blk_status_t sts;
|
||||
|
||||
if (op == REQ_OP_FLUSH) {
|
||||
cmd->error = errno_to_blk_status(null_handle_flush(nullb));
|
||||
cmd->error = null_handle_flush(nullb);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1912,7 +1926,7 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
|
||||
if (dev->cache_size > 0) {
|
||||
set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
|
||||
blk_queue_write_cache(nullb->q, true, true);
|
||||
blk_queue_write_cache(nullb->q, true, dev->fua);
|
||||
}
|
||||
|
||||
nullb->q->queuedata = nullb;
|
||||
@ -2113,10 +2127,13 @@ static void __exit null_exit(void)
|
||||
|
||||
if (tag_set.ops)
|
||||
blk_mq_free_tag_set(&tag_set);
|
||||
|
||||
mutex_destroy(&lock);
|
||||
}
|
||||
|
||||
module_init(null_init);
|
||||
module_exit(null_exit);
|
||||
|
||||
MODULE_AUTHOR("Jens Axboe <axboe@kernel.dk>");
|
||||
MODULE_DESCRIPTION("multi queue aware block test driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -82,6 +82,7 @@ struct nullb_device {
|
||||
unsigned int zone_nr_conv; /* number of conventional zones */
|
||||
unsigned int zone_max_open; /* max number of open zones */
|
||||
unsigned int zone_max_active; /* max number of active zones */
|
||||
unsigned int zone_append_max_sectors; /* Max sectors per zone append command */
|
||||
unsigned int submit_queues; /* number of submission queues */
|
||||
unsigned int prev_submit_queues; /* number of submission queues before change */
|
||||
unsigned int poll_queues; /* number of IOPOLL submission queues */
|
||||
@ -104,6 +105,7 @@ struct nullb_device {
|
||||
bool no_sched; /* no IO scheduler for the device */
|
||||
bool shared_tags; /* share tag set between devices for blk-mq */
|
||||
bool shared_tag_bitmap; /* use hostwide shared tags */
|
||||
bool fua; /* Support FUA */
|
||||
};
|
||||
|
||||
struct nullb {
|
||||
|
@ -9,6 +9,8 @@
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "null_blk: " fmt
|
||||
|
||||
#define NULL_ZONE_INVALID_WP ((sector_t)-1)
|
||||
|
||||
static inline sector_t mb_to_sects(unsigned long mb)
|
||||
{
|
||||
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
|
||||
@ -19,18 +21,6 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
|
||||
return sect >> ilog2(dev->zone_size_sects);
|
||||
}
|
||||
|
||||
static inline void null_lock_zone_res(struct nullb_device *dev)
|
||||
{
|
||||
if (dev->need_zone_res_mgmt)
|
||||
spin_lock_irq(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
static inline void null_unlock_zone_res(struct nullb_device *dev)
|
||||
{
|
||||
if (dev->need_zone_res_mgmt)
|
||||
spin_unlock_irq(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
static inline void null_init_zone_lock(struct nullb_device *dev,
|
||||
struct nullb_zone *zone)
|
||||
{
|
||||
@ -103,6 +93,11 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
dev->zone_nr_conv);
|
||||
}
|
||||
|
||||
dev->zone_append_max_sectors =
|
||||
min(ALIGN_DOWN(dev->zone_append_max_sectors,
|
||||
dev->blocksize >> SECTOR_SHIFT),
|
||||
zone_capacity_sects);
|
||||
|
||||
/* Max active zones has to be < nbr of seq zones in order to be enforceable */
|
||||
if (dev->zone_max_active >= dev->nr_zones - dev->zone_nr_conv) {
|
||||
dev->zone_max_active = 0;
|
||||
@ -154,7 +149,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
|
||||
lim->zoned = true;
|
||||
lim->chunk_sectors = dev->zone_size_sects;
|
||||
lim->max_zone_append_sectors = dev->zone_size_sects;
|
||||
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
|
||||
lim->max_open_zones = dev->zone_max_open;
|
||||
lim->max_active_zones = dev->zone_max_active;
|
||||
return 0;
|
||||
@ -163,11 +158,16 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
int null_register_zoned_dev(struct nullb *nullb)
|
||||
{
|
||||
struct request_queue *q = nullb->q;
|
||||
struct gendisk *disk = nullb->disk;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
|
||||
return blk_revalidate_disk_zones(nullb->disk, NULL);
|
||||
disk->nr_zones = bdev_nr_zones(disk->part0);
|
||||
|
||||
pr_info("%s: using %s zone append\n",
|
||||
disk->disk_name,
|
||||
queue_emulates_zone_append(q) ? "emulated" : "native");
|
||||
|
||||
return blk_revalidate_disk_zones(disk);
|
||||
}
|
||||
|
||||
void null_free_zoned_dev(struct nullb_device *dev)
|
||||
@ -241,35 +241,6 @@ size_t null_zone_valid_read_len(struct nullb *nullb,
|
||||
return (zone->wp - sector) << SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static blk_status_t __null_close_zone(struct nullb_device *dev,
|
||||
struct nullb_zone *zone)
|
||||
{
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
/* close operation on closed is not an error */
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_FULL:
|
||||
default:
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
if (zone->wp == zone->start) {
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
} else {
|
||||
zone->cond = BLK_ZONE_COND_CLOSED;
|
||||
dev->nr_zones_closed++;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void null_close_imp_open_zone(struct nullb_device *dev)
|
||||
{
|
||||
struct nullb_zone *zone;
|
||||
@ -286,7 +257,13 @@ static void null_close_imp_open_zone(struct nullb_device *dev)
|
||||
zno = dev->zone_nr_conv;
|
||||
|
||||
if (zone->cond == BLK_ZONE_COND_IMP_OPEN) {
|
||||
__null_close_zone(dev, zone);
|
||||
dev->nr_zones_imp_open--;
|
||||
if (zone->wp == zone->start) {
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
} else {
|
||||
zone->cond = BLK_ZONE_COND_CLOSED;
|
||||
dev->nr_zones_closed++;
|
||||
}
|
||||
dev->imp_close_zone_no = zno;
|
||||
return;
|
||||
}
|
||||
@ -374,73 +351,73 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
|
||||
null_lock_zone(dev, zone);
|
||||
|
||||
if (zone->cond == BLK_ZONE_COND_FULL ||
|
||||
zone->cond == BLK_ZONE_COND_READONLY ||
|
||||
zone->cond == BLK_ZONE_COND_OFFLINE) {
|
||||
/* Cannot write to the zone */
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Regular writes must be at the write pointer position.
|
||||
* Zone append writes are automatically issued at the write
|
||||
* pointer and the position returned using the request or BIO
|
||||
* sector.
|
||||
* Regular writes must be at the write pointer position. Zone append
|
||||
* writes are automatically issued at the write pointer and the position
|
||||
* returned using the request sector. Note that we do not check the zone
|
||||
* condition because for FULL, READONLY and OFFLINE zones, the sector
|
||||
* check against the zone write pointer will always result in failing
|
||||
* the command.
|
||||
*/
|
||||
if (append) {
|
||||
if (WARN_ON_ONCE(!dev->zone_append_max_sectors) ||
|
||||
zone->wp == NULL_ZONE_INVALID_WP) {
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock_zone;
|
||||
}
|
||||
sector = zone->wp;
|
||||
blk_mq_rq_from_pdu(cmd)->__sector = sector;
|
||||
} else if (sector != zone->wp) {
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (zone->wp + nr_sectors > zone->start + zone->capacity) {
|
||||
if (sector != zone->wp ||
|
||||
zone->wp + nr_sectors > zone->start + zone->capacity) {
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock;
|
||||
goto unlock_zone;
|
||||
}
|
||||
|
||||
if (zone->cond == BLK_ZONE_COND_CLOSED ||
|
||||
zone->cond == BLK_ZONE_COND_EMPTY) {
|
||||
null_lock_zone_res(dev);
|
||||
if (dev->need_zone_res_mgmt) {
|
||||
spin_lock(&dev->zone_res_lock);
|
||||
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK) {
|
||||
null_unlock_zone_res(dev);
|
||||
goto unlock;
|
||||
}
|
||||
if (zone->cond == BLK_ZONE_COND_CLOSED) {
|
||||
dev->nr_zones_closed--;
|
||||
dev->nr_zones_imp_open++;
|
||||
} else if (zone->cond == BLK_ZONE_COND_EMPTY) {
|
||||
dev->nr_zones_imp_open++;
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK) {
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
goto unlock_zone;
|
||||
}
|
||||
if (zone->cond == BLK_ZONE_COND_CLOSED) {
|
||||
dev->nr_zones_closed--;
|
||||
dev->nr_zones_imp_open++;
|
||||
} else if (zone->cond == BLK_ZONE_COND_EMPTY) {
|
||||
dev->nr_zones_imp_open++;
|
||||
}
|
||||
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
|
||||
null_unlock_zone_res(dev);
|
||||
zone->cond = BLK_ZONE_COND_IMP_OPEN;
|
||||
}
|
||||
|
||||
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock;
|
||||
goto unlock_zone;
|
||||
|
||||
zone->wp += nr_sectors;
|
||||
if (zone->wp == zone->start + zone->capacity) {
|
||||
null_lock_zone_res(dev);
|
||||
if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
|
||||
dev->nr_zones_exp_open--;
|
||||
else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
|
||||
dev->nr_zones_imp_open--;
|
||||
if (dev->need_zone_res_mgmt) {
|
||||
spin_lock(&dev->zone_res_lock);
|
||||
if (zone->cond == BLK_ZONE_COND_EXP_OPEN)
|
||||
dev->nr_zones_exp_open--;
|
||||
else if (zone->cond == BLK_ZONE_COND_IMP_OPEN)
|
||||
dev->nr_zones_imp_open--;
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
}
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
null_unlock_zone_res(dev);
|
||||
}
|
||||
|
||||
ret = BLK_STS_OK;
|
||||
|
||||
unlock:
|
||||
unlock_zone:
|
||||
null_unlock_zone(dev, zone);
|
||||
|
||||
return ret;
|
||||
@ -454,54 +431,100 @@ static blk_status_t null_open_zone(struct nullb_device *dev,
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
null_lock_zone_res(dev);
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
/* open operation on exp open is not an error */
|
||||
goto unlock;
|
||||
/* Open operation on exp open is not an error */
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock;
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock;
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
default:
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock;
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
if (dev->need_zone_res_mgmt) {
|
||||
spin_lock(&dev->zone_res_lock);
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK) {
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK) {
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return ret;
|
||||
}
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
dev->nr_zones_exp_open++;
|
||||
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
zone->cond = BLK_ZONE_COND_EXP_OPEN;
|
||||
dev->nr_zones_exp_open++;
|
||||
|
||||
unlock:
|
||||
null_unlock_zone_res(dev);
|
||||
|
||||
return ret;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t null_close_zone(struct nullb_device *dev,
|
||||
struct nullb_zone *zone)
|
||||
{
|
||||
blk_status_t ret;
|
||||
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
null_lock_zone_res(dev);
|
||||
ret = __null_close_zone(dev, zone);
|
||||
null_unlock_zone_res(dev);
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
/* close operation on closed is not an error */
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
break;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_FULL:
|
||||
default:
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
return ret;
|
||||
if (dev->need_zone_res_mgmt) {
|
||||
spin_lock(&dev->zone_res_lock);
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (zone->wp > zone->start)
|
||||
dev->nr_zones_closed++;
|
||||
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
if (zone->wp == zone->start)
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
else
|
||||
zone->cond = BLK_ZONE_COND_CLOSED;
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t null_finish_zone(struct nullb_device *dev,
|
||||
@ -512,41 +535,47 @@ static blk_status_t null_finish_zone(struct nullb_device *dev,
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
null_lock_zone_res(dev);
|
||||
if (dev->need_zone_res_mgmt) {
|
||||
spin_lock(&dev->zone_res_lock);
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_FULL:
|
||||
/* finish operation on full is not an error */
|
||||
goto unlock;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock;
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK)
|
||||
goto unlock;
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
default:
|
||||
ret = BLK_STS_IOERR;
|
||||
goto unlock;
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_FULL:
|
||||
/* Finish operation on full is not an error */
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK) {
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
ret = null_check_zone_resources(dev, zone);
|
||||
if (ret != BLK_STS_OK) {
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return ret;
|
||||
}
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
default:
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = zone->start + zone->len;
|
||||
|
||||
unlock:
|
||||
null_unlock_zone_res(dev);
|
||||
|
||||
return ret;
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t null_reset_zone(struct nullb_device *dev,
|
||||
@ -555,34 +584,33 @@ static blk_status_t null_reset_zone(struct nullb_device *dev,
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
null_lock_zone_res(dev);
|
||||
if (dev->need_zone_res_mgmt) {
|
||||
spin_lock(&dev->zone_res_lock);
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
/* reset operation on empty is not an error */
|
||||
null_unlock_zone_res(dev);
|
||||
return BLK_STS_OK;
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
break;
|
||||
default:
|
||||
null_unlock_zone_res(dev);
|
||||
return BLK_STS_IOERR;
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
dev->nr_zones_imp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
dev->nr_zones_exp_open--;
|
||||
break;
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
dev->nr_zones_closed--;
|
||||
break;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_FULL:
|
||||
break;
|
||||
default:
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
spin_unlock(&dev->zone_res_lock);
|
||||
}
|
||||
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
zone->wp = zone->start;
|
||||
|
||||
null_unlock_zone_res(dev);
|
||||
|
||||
if (dev->memory_backed)
|
||||
return null_handle_discard(dev, zone->start, zone->len);
|
||||
|
||||
@ -711,7 +739,7 @@ static void null_set_zone_cond(struct nullb_device *dev,
|
||||
zone->cond != BLK_ZONE_COND_OFFLINE)
|
||||
null_finish_zone(dev, zone);
|
||||
zone->cond = cond;
|
||||
zone->wp = (sector_t)-1;
|
||||
zone->wp = NULL_ZONE_INVALID_WP;
|
||||
}
|
||||
|
||||
null_unlock_zone(dev, zone);
|
||||
|
@ -221,7 +221,7 @@ static int ublk_get_nr_zones(const struct ublk_device *ub)
|
||||
|
||||
static int ublk_revalidate_disk_zones(struct ublk_device *ub)
|
||||
{
|
||||
return blk_revalidate_disk_zones(ub->ub_disk, NULL);
|
||||
return blk_revalidate_disk_zones(ub->ub_disk);
|
||||
}
|
||||
|
||||
static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
@ -249,8 +249,7 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
{
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
|
||||
blk_queue_required_elevator_features(ub->ub_disk->queue,
|
||||
ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
|
||||
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
|
||||
}
|
||||
|
||||
|
@ -1543,7 +1543,7 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
|
||||
err = blk_revalidate_disk_zones(vblk->disk, NULL);
|
||||
err = blk_revalidate_disk_zones(vblk->disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ void bch_dump_bucket(struct btree_keys *b)
|
||||
int __bch_count_data(struct btree_keys *b)
|
||||
{
|
||||
unsigned int ret = 0;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey *k;
|
||||
|
||||
if (b->ops->is_extents)
|
||||
@ -67,7 +67,7 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
struct bkey *k, *p = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
const char *err;
|
||||
|
||||
for_each_key(b, k, &iter) {
|
||||
@ -879,7 +879,7 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
|
||||
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
|
||||
struct bset *i = bset_tree_last(b)->data;
|
||||
struct bkey *m, *prev = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey preceding_key_on_stack = ZERO_KEY;
|
||||
struct bkey *preceding_key_p = &preceding_key_on_stack;
|
||||
|
||||
@ -895,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
|
||||
else
|
||||
preceding_key(k, &preceding_key_p);
|
||||
|
||||
m = bch_btree_iter_init(b, &iter, preceding_key_p);
|
||||
m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
|
||||
|
||||
if (b->ops->insert_fixup(b, k, &iter, replace_key))
|
||||
if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
|
||||
return status;
|
||||
|
||||
status = BTREE_INSERT_STATUS_INSERT;
|
||||
@ -1100,33 +1100,33 @@ void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
|
||||
btree_iter_cmp));
|
||||
}
|
||||
|
||||
static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
|
||||
struct btree_iter *iter,
|
||||
struct bkey *search,
|
||||
struct bset_tree *start)
|
||||
static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
|
||||
struct btree_iter_stack *iter,
|
||||
struct bkey *search,
|
||||
struct bset_tree *start)
|
||||
{
|
||||
struct bkey *ret = NULL;
|
||||
|
||||
iter->size = ARRAY_SIZE(iter->data);
|
||||
iter->used = 0;
|
||||
iter->iter.size = ARRAY_SIZE(iter->stack_data);
|
||||
iter->iter.used = 0;
|
||||
|
||||
#ifdef CONFIG_BCACHE_DEBUG
|
||||
iter->b = b;
|
||||
iter->iter.b = b;
|
||||
#endif
|
||||
|
||||
for (; start <= bset_tree_last(b); start++) {
|
||||
ret = bch_bset_search(b, start, search);
|
||||
bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
|
||||
bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct bkey *bch_btree_iter_init(struct btree_keys *b,
|
||||
struct btree_iter *iter,
|
||||
struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
|
||||
struct btree_iter_stack *iter,
|
||||
struct bkey *search)
|
||||
{
|
||||
return __bch_btree_iter_init(b, iter, search, b->set);
|
||||
return __bch_btree_iter_stack_init(b, iter, search, b->set);
|
||||
}
|
||||
|
||||
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
|
||||
@ -1293,10 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
|
||||
struct bset_sort_state *state)
|
||||
{
|
||||
size_t order = b->page_order, keys = 0;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
int oldsize = bch_count_data(b);
|
||||
|
||||
__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
|
||||
__bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
|
||||
|
||||
if (start) {
|
||||
unsigned int i;
|
||||
@ -1307,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
|
||||
order = get_order(__set_bytes(b->set->data, keys));
|
||||
}
|
||||
|
||||
__btree_sort(b, &iter, start, order, false, state);
|
||||
__btree_sort(b, &iter.iter, start, order, false, state);
|
||||
|
||||
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
|
||||
}
|
||||
@ -1323,11 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
|
||||
struct bset_sort_state *state)
|
||||
{
|
||||
uint64_t start_time = local_clock();
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
bch_btree_iter_init(b, &iter, NULL);
|
||||
bch_btree_iter_stack_init(b, &iter, NULL);
|
||||
|
||||
btree_mergesort(b, new->set->data, &iter, false, true);
|
||||
btree_mergesort(b, new->set->data, &iter.iter, false, true);
|
||||
|
||||
bch_time_stats_update(&state->time, start_time);
|
||||
|
||||
|
@ -321,7 +321,14 @@ struct btree_iter {
|
||||
#endif
|
||||
struct btree_iter_set {
|
||||
struct bkey *k, *end;
|
||||
} data[MAX_BSETS];
|
||||
} data[];
|
||||
};
|
||||
|
||||
/* Fixed-size btree_iter that can be allocated on the stack */
|
||||
|
||||
struct btree_iter_stack {
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_set stack_data[MAX_BSETS];
|
||||
};
|
||||
|
||||
typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
|
||||
@ -333,9 +340,9 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
|
||||
|
||||
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
|
||||
struct bkey *end);
|
||||
struct bkey *bch_btree_iter_init(struct btree_keys *b,
|
||||
struct btree_iter *iter,
|
||||
struct bkey *search);
|
||||
struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
|
||||
struct btree_iter_stack *iter,
|
||||
struct bkey *search);
|
||||
|
||||
struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
|
||||
const struct bkey *search);
|
||||
@ -350,13 +357,14 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
|
||||
return search ? __bch_bset_search(b, t, search) : t->data->start;
|
||||
}
|
||||
|
||||
#define for_each_key_filter(b, k, iter, filter) \
|
||||
for (bch_btree_iter_init((b), (iter), NULL); \
|
||||
((k) = bch_btree_iter_next_filter((iter), (b), filter));)
|
||||
#define for_each_key_filter(b, k, stack_iter, filter) \
|
||||
for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
|
||||
((k) = bch_btree_iter_next_filter(&((stack_iter)->iter), (b), \
|
||||
filter));)
|
||||
|
||||
#define for_each_key(b, k, iter) \
|
||||
for (bch_btree_iter_init((b), (iter), NULL); \
|
||||
((k) = bch_btree_iter_next(iter));)
|
||||
#define for_each_key(b, k, stack_iter) \
|
||||
for (bch_btree_iter_stack_init((b), (stack_iter), NULL); \
|
||||
((k) = bch_btree_iter_next(&((stack_iter)->iter)));)
|
||||
|
||||
/* Sorting */
|
||||
|
||||
|
@ -1309,7 +1309,7 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
|
||||
uint8_t stale = 0;
|
||||
unsigned int keys = 0, good_keys = 0;
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bset_tree *t;
|
||||
|
||||
gc->nodes++;
|
||||
@ -1570,7 +1570,7 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
|
||||
static unsigned int btree_gc_count_keys(struct btree *b)
|
||||
{
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
unsigned int ret = 0;
|
||||
|
||||
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
|
||||
@ -1611,17 +1611,18 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
|
||||
int ret = 0;
|
||||
bool should_rewrite;
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct gc_merge_info r[GC_MERGE_NODES];
|
||||
struct gc_merge_info *i, *last = r + ARRAY_SIZE(r) - 1;
|
||||
|
||||
bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, &b->c->gc_done);
|
||||
|
||||
for (i = r; i < r + ARRAY_SIZE(r); i++)
|
||||
i->b = ERR_PTR(-EINTR);
|
||||
|
||||
while (1) {
|
||||
k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad);
|
||||
if (k) {
|
||||
r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
|
||||
true, b);
|
||||
@ -1911,7 +1912,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
||||
{
|
||||
int ret = 0;
|
||||
struct bkey *k, *p = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid)
|
||||
bch_initial_mark_key(b->c, b->level, k);
|
||||
@ -1919,10 +1920,10 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
||||
bch_initial_mark_key(b->c, b->level + 1, &b->key);
|
||||
|
||||
if (b->level) {
|
||||
bch_btree_iter_init(&b->keys, &iter, NULL);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, NULL);
|
||||
|
||||
do {
|
||||
k = bch_btree_iter_next_filter(&iter, &b->keys,
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad);
|
||||
if (k) {
|
||||
btree_node_prefetch(b, k);
|
||||
@ -1950,7 +1951,7 @@ static int bch_btree_check_thread(void *arg)
|
||||
struct btree_check_info *info = arg;
|
||||
struct btree_check_state *check_state = info->state;
|
||||
struct cache_set *c = check_state->c;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey *k, *p;
|
||||
int cur_idx, prev_idx, skip_nr;
|
||||
|
||||
@ -1959,8 +1960,8 @@ static int bch_btree_check_thread(void *arg)
|
||||
ret = 0;
|
||||
|
||||
/* root node keys are checked before thread created */
|
||||
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||
bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
|
||||
BUG_ON(!k);
|
||||
|
||||
p = k;
|
||||
@ -1978,7 +1979,7 @@ static int bch_btree_check_thread(void *arg)
|
||||
skip_nr = cur_idx - prev_idx;
|
||||
|
||||
while (skip_nr) {
|
||||
k = bch_btree_iter_next_filter(&iter,
|
||||
k = bch_btree_iter_next_filter(&iter.iter,
|
||||
&c->root->keys,
|
||||
bch_ptr_bad);
|
||||
if (k)
|
||||
@ -2051,7 +2052,7 @@ int bch_btree_check(struct cache_set *c)
|
||||
int ret = 0;
|
||||
int i;
|
||||
struct bkey *k = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct btree_check_state check_state;
|
||||
|
||||
/* check and mark root node keys */
|
||||
@ -2547,11 +2548,11 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
|
||||
|
||||
if (b->level) {
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
bch_btree_iter_init(&b->keys, &iter, from);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, from);
|
||||
|
||||
while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
|
||||
while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad))) {
|
||||
ret = bcache_btree(map_nodes_recurse, k, b,
|
||||
op, from, fn, flags);
|
||||
@ -2580,11 +2581,12 @@ int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
||||
{
|
||||
int ret = MAP_CONTINUE;
|
||||
struct bkey *k;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
bch_btree_iter_init(&b->keys, &iter, from);
|
||||
bch_btree_iter_stack_init(&b->keys, &iter, from);
|
||||
|
||||
while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
|
||||
while ((k = bch_btree_iter_next_filter(&iter.iter, &b->keys,
|
||||
bch_ptr_bad))) {
|
||||
ret = !b->level
|
||||
? fn(op, b, k)
|
||||
: bcache_btree(map_keys_recurse, k,
|
||||
|
@ -881,8 +881,8 @@ static void bcache_device_free(struct bcache_device *d)
|
||||
bcache_device_detach(d);
|
||||
|
||||
if (disk) {
|
||||
ida_simple_remove(&bcache_device_idx,
|
||||
first_minor_to_idx(disk->first_minor));
|
||||
ida_free(&bcache_device_idx,
|
||||
first_minor_to_idx(disk->first_minor));
|
||||
put_disk(disk);
|
||||
}
|
||||
|
||||
@ -940,8 +940,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
if (!d->full_dirty_stripes)
|
||||
goto out_free_stripe_sectors_dirty;
|
||||
|
||||
idx = ida_simple_get(&bcache_device_idx, 0,
|
||||
BCACHE_DEVICE_IDX_MAX, GFP_KERNEL);
|
||||
idx = ida_alloc_max(&bcache_device_idx, BCACHE_DEVICE_IDX_MAX - 1,
|
||||
GFP_KERNEL);
|
||||
if (idx < 0)
|
||||
goto out_free_full_dirty_stripes;
|
||||
|
||||
@ -986,7 +986,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
out_bioset_exit:
|
||||
bioset_exit(&d->bio_split);
|
||||
out_ida_remove:
|
||||
ida_simple_remove(&bcache_device_idx, idx);
|
||||
ida_free(&bcache_device_idx, idx);
|
||||
out_free_full_dirty_stripes:
|
||||
kvfree(d->full_dirty_stripes);
|
||||
out_free_stripe_sectors_dirty:
|
||||
@ -1914,8 +1914,9 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
|
||||
INIT_LIST_HEAD(&c->btree_cache_freed);
|
||||
INIT_LIST_HEAD(&c->data_buckets);
|
||||
|
||||
iter_size = ((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size + 1) *
|
||||
sizeof(struct btree_iter_set);
|
||||
iter_size = sizeof(struct btree_iter) +
|
||||
((meta_bucket_pages(sb) * PAGE_SECTORS) / sb->block_size) *
|
||||
sizeof(struct btree_iter_set);
|
||||
|
||||
c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL);
|
||||
if (!c->devices)
|
||||
|
@ -660,7 +660,7 @@ static unsigned int bch_root_usage(struct cache_set *c)
|
||||
unsigned int bytes = 0;
|
||||
struct bkey *k;
|
||||
struct btree *b;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
|
||||
goto lock_root;
|
||||
|
||||
|
@ -908,15 +908,15 @@ static int bch_dirty_init_thread(void *arg)
|
||||
struct dirty_init_thrd_info *info = arg;
|
||||
struct bch_dirty_init_state *state = info->state;
|
||||
struct cache_set *c = state->c;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct bkey *k, *p;
|
||||
int cur_idx, prev_idx, skip_nr;
|
||||
|
||||
k = p = NULL;
|
||||
prev_idx = 0;
|
||||
|
||||
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||
bch_btree_iter_stack_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter.iter, &c->root->keys, bch_ptr_bad);
|
||||
BUG_ON(!k);
|
||||
|
||||
p = k;
|
||||
@ -930,7 +930,7 @@ static int bch_dirty_init_thread(void *arg)
|
||||
skip_nr = cur_idx - prev_idx;
|
||||
|
||||
while (skip_nr) {
|
||||
k = bch_btree_iter_next_filter(&iter,
|
||||
k = bch_btree_iter_next_filter(&iter.iter,
|
||||
&c->root->keys,
|
||||
bch_ptr_bad);
|
||||
if (k)
|
||||
@ -979,7 +979,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
|
||||
int i;
|
||||
struct btree *b = NULL;
|
||||
struct bkey *k = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_iter_stack iter;
|
||||
struct sectors_dirty_init op;
|
||||
struct cache_set *c = d->c;
|
||||
struct bch_dirty_init_state state;
|
||||
|
@ -321,8 +321,7 @@ static bool __unlock(struct dm_bio_prison_v2 *prison,
|
||||
{
|
||||
BUG_ON(!cell->exclusive_lock);
|
||||
|
||||
bio_list_merge(bios, &cell->bios);
|
||||
bio_list_init(&cell->bios);
|
||||
bio_list_merge_init(bios, &cell->bios);
|
||||
|
||||
if (cell->shared_count) {
|
||||
cell->exclusive_lock = false;
|
||||
|
@ -115,8 +115,7 @@ static void __commit(struct work_struct *_ws)
|
||||
*/
|
||||
spin_lock_irq(&b->lock);
|
||||
list_splice_init(&b->work_items, &work_items);
|
||||
bio_list_merge(&bios, &b->bios);
|
||||
bio_list_init(&b->bios);
|
||||
bio_list_merge_init(&bios, &b->bios);
|
||||
b->commit_scheduled = false;
|
||||
spin_unlock_irq(&b->lock);
|
||||
|
||||
@ -565,8 +564,7 @@ static void defer_bio(struct cache *cache, struct bio *bio)
|
||||
static void defer_bios(struct cache *cache, struct bio_list *bios)
|
||||
{
|
||||
spin_lock_irq(&cache->lock);
|
||||
bio_list_merge(&cache->deferred_bios, bios);
|
||||
bio_list_init(bios);
|
||||
bio_list_merge_init(&cache->deferred_bios, bios);
|
||||
spin_unlock_irq(&cache->lock);
|
||||
|
||||
wake_deferred_bio_worker(cache);
|
||||
@ -1816,8 +1814,7 @@ static void process_deferred_bios(struct work_struct *ws)
|
||||
bio_list_init(&bios);
|
||||
|
||||
spin_lock_irq(&cache->lock);
|
||||
bio_list_merge(&bios, &cache->deferred_bios);
|
||||
bio_list_init(&cache->deferred_bios);
|
||||
bio_list_merge_init(&bios, &cache->deferred_bios);
|
||||
spin_unlock_irq(&cache->lock);
|
||||
|
||||
while ((bio = bio_list_pop(&bios))) {
|
||||
@ -1847,8 +1844,7 @@ static void requeue_deferred_bios(struct cache *cache)
|
||||
struct bio_list bios;
|
||||
|
||||
bio_list_init(&bios);
|
||||
bio_list_merge(&bios, &cache->deferred_bios);
|
||||
bio_list_init(&cache->deferred_bios);
|
||||
bio_list_merge_init(&bios, &cache->deferred_bios);
|
||||
|
||||
while ((bio = bio_list_pop(&bios))) {
|
||||
bio->bi_status = BLK_STS_DM_REQUEUE;
|
||||
|
@ -1181,8 +1181,7 @@ static void process_deferred_discards(struct clone *clone)
|
||||
struct bio_list discards = BIO_EMPTY_LIST;
|
||||
|
||||
spin_lock_irq(&clone->lock);
|
||||
bio_list_merge(&discards, &clone->deferred_discard_bios);
|
||||
bio_list_init(&clone->deferred_discard_bios);
|
||||
bio_list_merge_init(&discards, &clone->deferred_discard_bios);
|
||||
spin_unlock_irq(&clone->lock);
|
||||
|
||||
if (bio_list_empty(&discards))
|
||||
@ -1215,8 +1214,7 @@ static void process_deferred_bios(struct clone *clone)
|
||||
struct bio_list bios = BIO_EMPTY_LIST;
|
||||
|
||||
spin_lock_irq(&clone->lock);
|
||||
bio_list_merge(&bios, &clone->deferred_bios);
|
||||
bio_list_init(&clone->deferred_bios);
|
||||
bio_list_merge_init(&bios, &clone->deferred_bios);
|
||||
spin_unlock_irq(&clone->lock);
|
||||
|
||||
if (bio_list_empty(&bios))
|
||||
@ -1237,11 +1235,9 @@ static void process_deferred_flush_bios(struct clone *clone)
|
||||
* before issuing them or signaling their completion.
|
||||
*/
|
||||
spin_lock_irq(&clone->lock);
|
||||
bio_list_merge(&bios, &clone->deferred_flush_bios);
|
||||
bio_list_init(&clone->deferred_flush_bios);
|
||||
|
||||
bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
|
||||
bio_list_init(&clone->deferred_flush_completions);
|
||||
bio_list_merge_init(&bios, &clone->deferred_flush_bios);
|
||||
bio_list_merge_init(&bio_completions,
|
||||
&clone->deferred_flush_completions);
|
||||
spin_unlock_irq(&clone->lock);
|
||||
|
||||
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
|
||||
|
@ -140,7 +140,7 @@ struct mapped_device {
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
unsigned int nr_zones;
|
||||
unsigned int *zwp_offset;
|
||||
void *zone_revalidate_map;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IMA
|
||||
|
@ -1272,8 +1272,7 @@ static void process_deferred_bios(struct era *era)
|
||||
bio_list_init(&marked_bios);
|
||||
|
||||
spin_lock(&era->deferred_lock);
|
||||
bio_list_merge(&deferred_bios, &era->deferred_bios);
|
||||
bio_list_init(&era->deferred_bios);
|
||||
bio_list_merge_init(&deferred_bios, &era->deferred_bios);
|
||||
spin_unlock(&era->deferred_lock);
|
||||
|
||||
if (bio_list_empty(&deferred_bios))
|
||||
|
@ -704,8 +704,7 @@ static void process_queued_bios(struct work_struct *work)
|
||||
return;
|
||||
}
|
||||
|
||||
bio_list_merge(&bios, &m->queued_bios);
|
||||
bio_list_init(&m->queued_bios);
|
||||
bio_list_merge_init(&bios, &m->queued_bios);
|
||||
|
||||
spin_unlock_irqrestore(&m->lock, flags);
|
||||
|
||||
|
@ -2042,7 +2042,8 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
r = dm_set_zones_restrictions(t, q);
|
||||
if (r)
|
||||
return r;
|
||||
if (!static_key_enabled(&zoned_enabled.key))
|
||||
if (blk_queue_is_zoned(q) &&
|
||||
!static_key_enabled(&zoned_enabled.key))
|
||||
static_branch_enable(&zoned_enabled);
|
||||
}
|
||||
|
||||
|
@ -592,12 +592,6 @@ struct dm_thin_endio_hook {
|
||||
struct dm_bio_prison_cell *cell;
|
||||
};
|
||||
|
||||
static void __merge_bio_list(struct bio_list *bios, struct bio_list *master)
|
||||
{
|
||||
bio_list_merge(bios, master);
|
||||
bio_list_init(master);
|
||||
}
|
||||
|
||||
static void error_bio_list(struct bio_list *bios, blk_status_t error)
|
||||
{
|
||||
struct bio *bio;
|
||||
@ -616,7 +610,7 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
|
||||
bio_list_init(&bios);
|
||||
|
||||
spin_lock_irq(&tc->lock);
|
||||
__merge_bio_list(&bios, master);
|
||||
bio_list_merge_init(&bios, master);
|
||||
spin_unlock_irq(&tc->lock);
|
||||
|
||||
error_bio_list(&bios, error);
|
||||
@ -645,8 +639,8 @@ static void requeue_io(struct thin_c *tc)
|
||||
bio_list_init(&bios);
|
||||
|
||||
spin_lock_irq(&tc->lock);
|
||||
__merge_bio_list(&bios, &tc->deferred_bio_list);
|
||||
__merge_bio_list(&bios, &tc->retry_on_resume_list);
|
||||
bio_list_merge_init(&bios, &tc->deferred_bio_list);
|
||||
bio_list_merge_init(&bios, &tc->retry_on_resume_list);
|
||||
spin_unlock_irq(&tc->lock);
|
||||
|
||||
error_bio_list(&bios, BLK_STS_DM_REQUEUE);
|
||||
|
@ -604,8 +604,7 @@ static void assign_discard_permit(struct limiter *limiter)
|
||||
|
||||
static void get_waiters(struct limiter *limiter)
|
||||
{
|
||||
bio_list_merge(&limiter->waiters, &limiter->new_waiters);
|
||||
bio_list_init(&limiter->new_waiters);
|
||||
bio_list_merge_init(&limiter->waiters, &limiter->new_waiters);
|
||||
}
|
||||
|
||||
static inline struct data_vio *get_available_data_vio(struct data_vio_pool *pool)
|
||||
|
@ -369,8 +369,7 @@ void vdo_dump_flusher(const struct flusher *flusher)
|
||||
static void initialize_flush(struct vdo_flush *flush, struct vdo *vdo)
|
||||
{
|
||||
bio_list_init(&flush->bios);
|
||||
bio_list_merge(&flush->bios, &vdo->flusher->waiting_flush_bios);
|
||||
bio_list_init(&vdo->flusher->waiting_flush_bios);
|
||||
bio_list_merge_init(&flush->bios, &vdo->flusher->waiting_flush_bios);
|
||||
}
|
||||
|
||||
static void launch_flush(struct vdo_flush *flush)
|
||||
|
@ -60,16 +60,23 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
struct dm_table *map;
|
||||
int srcu_idx, ret;
|
||||
|
||||
if (dm_suspended_md(md))
|
||||
return -EAGAIN;
|
||||
if (!md->zone_revalidate_map) {
|
||||
/* Regular user context */
|
||||
if (dm_suspended_md(md))
|
||||
return -EAGAIN;
|
||||
|
||||
map = dm_get_live_table(md, &srcu_idx);
|
||||
if (!map)
|
||||
return -EIO;
|
||||
map = dm_get_live_table(md, &srcu_idx);
|
||||
if (!map)
|
||||
return -EIO;
|
||||
} else {
|
||||
/* Zone revalidation during __bind() */
|
||||
map = md->zone_revalidate_map;
|
||||
}
|
||||
|
||||
ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data);
|
||||
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
if (!md->zone_revalidate_map)
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -138,80 +145,47 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
void dm_cleanup_zoned_dev(struct mapped_device *md)
|
||||
/*
|
||||
* Count conventional zones of a mapped zoned device. If the device
|
||||
* only has conventional zones, do not expose it as zoned.
|
||||
*/
|
||||
static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
if (md->disk) {
|
||||
bitmap_free(md->disk->conv_zones_bitmap);
|
||||
md->disk->conv_zones_bitmap = NULL;
|
||||
bitmap_free(md->disk->seq_zones_wlock);
|
||||
md->disk->seq_zones_wlock = NULL;
|
||||
}
|
||||
unsigned int *nr_conv_zones = data;
|
||||
|
||||
kvfree(md->zwp_offset);
|
||||
md->zwp_offset = NULL;
|
||||
md->nr_zones = 0;
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
(*nr_conv_zones)++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int dm_get_zone_wp_offset(struct blk_zone *zone)
|
||||
static int dm_check_zoned(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return zone->wp - zone->start;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
return zone->len;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_NOT_WP:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
default:
|
||||
/*
|
||||
* Conventional, offline and read-only zones do not have a valid
|
||||
* write pointer. Use 0 as for an empty zone.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
struct mapped_device *md = data;
|
||||
struct gendisk *disk = md->disk;
|
||||
unsigned int nr_conv_zones = 0;
|
||||
int ret;
|
||||
|
||||
switch (zone->type) {
|
||||
case BLK_ZONE_TYPE_CONVENTIONAL:
|
||||
if (!disk->conv_zones_bitmap) {
|
||||
disk->conv_zones_bitmap = bitmap_zalloc(disk->nr_zones,
|
||||
GFP_NOIO);
|
||||
if (!disk->conv_zones_bitmap)
|
||||
return -ENOMEM;
|
||||
}
|
||||
set_bit(idx, disk->conv_zones_bitmap);
|
||||
break;
|
||||
case BLK_ZONE_TYPE_SEQWRITE_REQ:
|
||||
case BLK_ZONE_TYPE_SEQWRITE_PREF:
|
||||
if (!disk->seq_zones_wlock) {
|
||||
disk->seq_zones_wlock = bitmap_zalloc(disk->nr_zones,
|
||||
GFP_NOIO);
|
||||
if (!disk->seq_zones_wlock)
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!md->zwp_offset) {
|
||||
md->zwp_offset =
|
||||
kvcalloc(disk->nr_zones, sizeof(unsigned int),
|
||||
GFP_KERNEL);
|
||||
if (!md->zwp_offset)
|
||||
return -ENOMEM;
|
||||
}
|
||||
md->zwp_offset[idx] = dm_get_zone_wp_offset(zone);
|
||||
/* Count conventional zones */
|
||||
md->zone_revalidate_map = t;
|
||||
ret = dm_blk_report_zones(disk, 0, UINT_MAX,
|
||||
dm_check_zoned_cb, &nr_conv_zones);
|
||||
md->zone_revalidate_map = NULL;
|
||||
if (ret < 0) {
|
||||
DMERR("Check zoned failed %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
DMERR("Invalid zone type 0x%x at sectors %llu",
|
||||
(int)zone->type, zone->start);
|
||||
return -ENODEV;
|
||||
/*
|
||||
* If we only have conventional zones, expose the mapped device as
|
||||
* a regular device.
|
||||
*/
|
||||
if (nr_conv_zones >= ret) {
|
||||
disk->queue->limits.max_open_zones = 0;
|
||||
disk->queue->limits.max_active_zones = 0;
|
||||
disk->queue->limits.zoned = false;
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
disk->nr_zones = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -226,41 +200,32 @@ static int dm_zone_revalidate_cb(struct blk_zone *zone, unsigned int idx,
|
||||
static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
|
||||
{
|
||||
struct gendisk *disk = md->disk;
|
||||
unsigned int noio_flag;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Check if something changed. If yes, cleanup the current resources
|
||||
* and reallocate everything.
|
||||
*/
|
||||
/* Revalidate only if something changed. */
|
||||
if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
|
||||
dm_cleanup_zoned_dev(md);
|
||||
md->nr_zones = 0;
|
||||
|
||||
if (md->nr_zones)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Scan all zones to initialize everything. Ensure that all vmalloc
|
||||
* operations in this context are done as if GFP_NOIO was specified.
|
||||
* Our table is not live yet. So the call to dm_get_live_table()
|
||||
* in dm_blk_report_zones() will fail. Set a temporary pointer to
|
||||
* our table for dm_blk_report_zones() to use directly.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = dm_blk_do_report_zones(md, t, 0, disk->nr_zones,
|
||||
dm_zone_revalidate_cb, md);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
if (ret != disk->nr_zones) {
|
||||
ret = -EIO;
|
||||
goto err;
|
||||
md->zone_revalidate_map = t;
|
||||
ret = blk_revalidate_disk_zones(disk);
|
||||
md->zone_revalidate_map = NULL;
|
||||
|
||||
if (ret) {
|
||||
DMERR("Revalidate zones failed %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
md->nr_zones = disk->nr_zones;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
DMERR("Revalidate zones failed %d", ret);
|
||||
dm_cleanup_zoned_dev(md);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int device_not_zone_append_capable(struct dm_target *ti,
|
||||
@ -289,296 +254,42 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
|
||||
{
|
||||
struct mapped_device *md = t->md;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* For a zoned target, the number of zones should be updated for the
|
||||
* correct value to be exposed in sysfs queue/nr_zones.
|
||||
* Check if zone append is natively supported, and if not, set the
|
||||
* mapped device queue as needing zone append emulation.
|
||||
*/
|
||||
WARN_ON_ONCE(queue_is_mq(q));
|
||||
md->disk->nr_zones = bdev_nr_zones(md->disk->part0);
|
||||
|
||||
/* Check if zone append is natively supported */
|
||||
if (dm_table_supports_zone_append(t)) {
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
dm_cleanup_zoned_dev(md);
|
||||
return 0;
|
||||
} else {
|
||||
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
blk_queue_max_zone_append_sectors(q, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the mapped device as needing zone append emulation and
|
||||
* initialize the emulation resources once the capacity is set.
|
||||
*/
|
||||
set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
if (!get_capacity(md->disk))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Check that the mapped device will indeed be zoned, that is, that it
|
||||
* has sequential write required zones.
|
||||
*/
|
||||
ret = dm_check_zoned(md, t);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!blk_queue_is_zoned(q))
|
||||
return 0;
|
||||
|
||||
if (!md->disk->nr_zones) {
|
||||
DMINFO("%s using %s zone append",
|
||||
md->disk->disk_name,
|
||||
queue_emulates_zone_append(q) ? "emulated" : "native");
|
||||
}
|
||||
|
||||
return dm_revalidate_zones(md, t);
|
||||
}
|
||||
|
||||
static int dm_update_zone_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
unsigned int *wp_offset = data;
|
||||
|
||||
*wp_offset = dm_get_zone_wp_offset(zone);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
|
||||
unsigned int *wp_ofst)
|
||||
{
|
||||
sector_t sector = zno * bdev_zone_sectors(md->disk->part0);
|
||||
unsigned int noio_flag;
|
||||
struct dm_table *t;
|
||||
int srcu_idx, ret;
|
||||
|
||||
t = dm_get_live_table(md, &srcu_idx);
|
||||
if (!t)
|
||||
return -EIO;
|
||||
|
||||
/*
|
||||
* Ensure that all memory allocations in this context are done as if
|
||||
* GFP_NOIO was specified.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = dm_blk_do_report_zones(md, t, sector, 1,
|
||||
dm_update_zone_wp_offset_cb, wp_ofst);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
||||
if (ret != 1)
|
||||
return -EIO;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct orig_bio_details {
|
||||
enum req_op op;
|
||||
unsigned int nr_sectors;
|
||||
};
|
||||
|
||||
/*
|
||||
* First phase of BIO mapping for targets with zone append emulation:
|
||||
* check all BIO that change a zone writer pointer and change zone
|
||||
* append operations into regular write operations.
|
||||
*/
|
||||
static bool dm_zone_map_bio_begin(struct mapped_device *md,
|
||||
unsigned int zno, struct bio *clone)
|
||||
{
|
||||
sector_t zsectors = bdev_zone_sectors(md->disk->part0);
|
||||
unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
|
||||
|
||||
/*
|
||||
* If the target zone is in an error state, recover by inspecting the
|
||||
* zone to get its current write pointer position. Note that since the
|
||||
* target zone is already locked, a BIO issuing context should never
|
||||
* see the zone write in the DM_ZONE_UPDATING_WP_OFST state.
|
||||
*/
|
||||
if (zwp_offset == DM_ZONE_INVALID_WP_OFST) {
|
||||
if (dm_update_zone_wp_offset(md, zno, &zwp_offset))
|
||||
return false;
|
||||
WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
|
||||
}
|
||||
|
||||
switch (bio_op(clone)) {
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
return true;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE:
|
||||
/* Writes must be aligned to the zone write pointer */
|
||||
if ((clone->bi_iter.bi_sector & (zsectors - 1)) != zwp_offset)
|
||||
return false;
|
||||
break;
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
/*
|
||||
* Change zone append operations into a non-mergeable regular
|
||||
* writes directed at the current write pointer position of the
|
||||
* target zone.
|
||||
*/
|
||||
clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
|
||||
(clone->bi_opf & (~REQ_OP_MASK));
|
||||
clone->bi_iter.bi_sector += zwp_offset;
|
||||
break;
|
||||
default:
|
||||
DMWARN_LIMIT("Invalid BIO operation");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Cannot write to a full zone */
|
||||
if (zwp_offset >= zsectors)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Second phase of BIO mapping for targets with zone append emulation:
|
||||
* update the zone write pointer offset array to account for the additional
|
||||
* data written to a zone. Note that at this point, the remapped clone BIO
|
||||
* may already have completed, so we do not touch it.
|
||||
*/
|
||||
static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno,
|
||||
struct orig_bio_details *orig_bio_details,
|
||||
unsigned int nr_sectors)
|
||||
{
|
||||
unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
|
||||
|
||||
/* The clone BIO may already have been completed and failed */
|
||||
if (zwp_offset == DM_ZONE_INVALID_WP_OFST)
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/* Update the zone wp offset */
|
||||
switch (orig_bio_details->op) {
|
||||
case REQ_OP_ZONE_RESET:
|
||||
WRITE_ONCE(md->zwp_offset[zno], 0);
|
||||
return BLK_STS_OK;
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
WRITE_ONCE(md->zwp_offset[zno],
|
||||
bdev_zone_sectors(md->disk->part0));
|
||||
return BLK_STS_OK;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE:
|
||||
WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
|
||||
return BLK_STS_OK;
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
/*
|
||||
* Check that the target did not truncate the write operation
|
||||
* emulating a zone append.
|
||||
*/
|
||||
if (nr_sectors != orig_bio_details->nr_sectors) {
|
||||
DMWARN_LIMIT("Truncated write for zone append");
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
WRITE_ONCE(md->zwp_offset[zno], zwp_offset + nr_sectors);
|
||||
return BLK_STS_OK;
|
||||
default:
|
||||
DMWARN_LIMIT("Invalid BIO operation");
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void dm_zone_lock(struct gendisk *disk, unsigned int zno,
|
||||
struct bio *clone)
|
||||
{
|
||||
if (WARN_ON_ONCE(bio_flagged(clone, BIO_ZONE_WRITE_LOCKED)))
|
||||
return;
|
||||
|
||||
wait_on_bit_lock_io(disk->seq_zones_wlock, zno, TASK_UNINTERRUPTIBLE);
|
||||
bio_set_flag(clone, BIO_ZONE_WRITE_LOCKED);
|
||||
}
|
||||
|
||||
static inline void dm_zone_unlock(struct gendisk *disk, unsigned int zno,
|
||||
struct bio *clone)
|
||||
{
|
||||
if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!test_bit(zno, disk->seq_zones_wlock));
|
||||
clear_bit_unlock(zno, disk->seq_zones_wlock);
|
||||
smp_mb__after_atomic();
|
||||
wake_up_bit(disk->seq_zones_wlock, zno);
|
||||
|
||||
bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
|
||||
}
|
||||
|
||||
static bool dm_need_zone_wp_tracking(struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* Special processing is not needed for operations that do not need the
|
||||
* zone write lock, that is, all operations that target conventional
|
||||
* zones and all operations that do not modify directly a sequential
|
||||
* zone write pointer.
|
||||
*/
|
||||
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
|
||||
return false;
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE:
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
return bio_zone_is_seq(bio);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Special IO mapping for targets needing zone append emulation.
|
||||
*/
|
||||
int dm_zone_map_bio(struct dm_target_io *tio)
|
||||
{
|
||||
struct dm_io *io = tio->io;
|
||||
struct dm_target *ti = tio->ti;
|
||||
struct mapped_device *md = io->md;
|
||||
struct bio *clone = &tio->clone;
|
||||
struct orig_bio_details orig_bio_details;
|
||||
unsigned int zno;
|
||||
blk_status_t sts;
|
||||
int r;
|
||||
|
||||
/*
|
||||
* IOs that do not change a zone write pointer do not need
|
||||
* any additional special processing.
|
||||
*/
|
||||
if (!dm_need_zone_wp_tracking(clone))
|
||||
return ti->type->map(ti, clone);
|
||||
|
||||
/* Lock the target zone */
|
||||
zno = bio_zone_no(clone);
|
||||
dm_zone_lock(md->disk, zno, clone);
|
||||
|
||||
orig_bio_details.nr_sectors = bio_sectors(clone);
|
||||
orig_bio_details.op = bio_op(clone);
|
||||
|
||||
/*
|
||||
* Check that the bio and the target zone write pointer offset are
|
||||
* both valid, and if the bio is a zone append, remap it to a write.
|
||||
*/
|
||||
if (!dm_zone_map_bio_begin(md, zno, clone)) {
|
||||
dm_zone_unlock(md->disk, zno, clone);
|
||||
return DM_MAPIO_KILL;
|
||||
}
|
||||
|
||||
/* Let the target do its work */
|
||||
r = ti->type->map(ti, clone);
|
||||
switch (r) {
|
||||
case DM_MAPIO_SUBMITTED:
|
||||
/*
|
||||
* The target submitted the clone BIO. The target zone will
|
||||
* be unlocked on completion of the clone.
|
||||
*/
|
||||
sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
|
||||
*tio->len_ptr);
|
||||
break;
|
||||
case DM_MAPIO_REMAPPED:
|
||||
/*
|
||||
* The target only remapped the clone BIO. In case of error,
|
||||
* unlock the target zone here as the clone will not be
|
||||
* submitted.
|
||||
*/
|
||||
sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
|
||||
*tio->len_ptr);
|
||||
if (sts != BLK_STS_OK)
|
||||
dm_zone_unlock(md->disk, zno, clone);
|
||||
break;
|
||||
case DM_MAPIO_REQUEUE:
|
||||
case DM_MAPIO_KILL:
|
||||
default:
|
||||
dm_zone_unlock(md->disk, zno, clone);
|
||||
sts = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
if (sts != BLK_STS_OK)
|
||||
return DM_MAPIO_KILL;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* IO completion callback called from clone_endio().
|
||||
*/
|
||||
@ -587,61 +298,17 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
|
||||
struct mapped_device *md = io->md;
|
||||
struct gendisk *disk = md->disk;
|
||||
struct bio *orig_bio = io->orig_bio;
|
||||
unsigned int zwp_offset;
|
||||
unsigned int zno;
|
||||
|
||||
/*
|
||||
* For targets that do not emulate zone append, we only need to
|
||||
* handle native zone-append bios.
|
||||
* Get the offset within the zone of the written sector
|
||||
* and add that to the original bio sector position.
|
||||
*/
|
||||
if (!dm_emulate_zone_append(md)) {
|
||||
/*
|
||||
* Get the offset within the zone of the written sector
|
||||
* and add that to the original bio sector position.
|
||||
*/
|
||||
if (clone->bi_status == BLK_STS_OK &&
|
||||
bio_op(clone) == REQ_OP_ZONE_APPEND) {
|
||||
sector_t mask =
|
||||
(sector_t)bdev_zone_sectors(disk->part0) - 1;
|
||||
if (clone->bi_status == BLK_STS_OK &&
|
||||
bio_op(clone) == REQ_OP_ZONE_APPEND) {
|
||||
sector_t mask = bdev_zone_sectors(disk->part0) - 1;
|
||||
|
||||
orig_bio->bi_iter.bi_sector +=
|
||||
clone->bi_iter.bi_sector & mask;
|
||||
}
|
||||
|
||||
return;
|
||||
orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask;
|
||||
}
|
||||
|
||||
/*
|
||||
* For targets that do emulate zone append, if the clone BIO does not
|
||||
* own the target zone write lock, we have nothing to do.
|
||||
*/
|
||||
if (!bio_flagged(clone, BIO_ZONE_WRITE_LOCKED))
|
||||
return;
|
||||
|
||||
zno = bio_zone_no(orig_bio);
|
||||
|
||||
if (clone->bi_status != BLK_STS_OK) {
|
||||
/*
|
||||
* BIOs that modify a zone write pointer may leave the zone
|
||||
* in an unknown state in case of failure (e.g. the write
|
||||
* pointer was only partially advanced). In this case, set
|
||||
* the target zone write pointer as invalid unless it is
|
||||
* already being updated.
|
||||
*/
|
||||
WRITE_ONCE(md->zwp_offset[zno], DM_ZONE_INVALID_WP_OFST);
|
||||
} else if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
|
||||
/*
|
||||
* Get the written sector for zone append operation that were
|
||||
* emulated using regular write operations.
|
||||
*/
|
||||
zwp_offset = READ_ONCE(md->zwp_offset[zno]);
|
||||
if (WARN_ON_ONCE(zwp_offset < bio_sectors(orig_bio)))
|
||||
WRITE_ONCE(md->zwp_offset[zno],
|
||||
DM_ZONE_INVALID_WP_OFST);
|
||||
else
|
||||
orig_bio->bi_iter.bi_sector +=
|
||||
zwp_offset - bio_sectors(orig_bio);
|
||||
}
|
||||
|
||||
dm_zone_unlock(disk, zno, clone);
|
||||
return;
|
||||
}
|
||||
|
@ -1428,25 +1428,12 @@ static void __map_bio(struct bio *clone)
|
||||
down(&md->swap_bios_semaphore);
|
||||
}
|
||||
|
||||
if (static_branch_unlikely(&zoned_enabled)) {
|
||||
/*
|
||||
* Check if the IO needs a special mapping due to zone append
|
||||
* emulation on zoned target. In this case, dm_zone_map_bio()
|
||||
* calls the target map operation.
|
||||
*/
|
||||
if (unlikely(dm_emulate_zone_append(md)))
|
||||
r = dm_zone_map_bio(tio);
|
||||
else
|
||||
goto do_map;
|
||||
} else {
|
||||
do_map:
|
||||
if (likely(ti->type->map == linear_map))
|
||||
r = linear_map(ti, clone);
|
||||
else if (ti->type->map == stripe_map)
|
||||
r = stripe_map(ti, clone);
|
||||
else
|
||||
r = ti->type->map(ti, clone);
|
||||
}
|
||||
if (likely(ti->type->map == linear_map))
|
||||
r = linear_map(ti, clone);
|
||||
else if (ti->type->map == stripe_map)
|
||||
r = stripe_map(ti, clone);
|
||||
else
|
||||
r = ti->type->map(ti, clone);
|
||||
|
||||
switch (r) {
|
||||
case DM_MAPIO_SUBMITTED:
|
||||
@ -1774,6 +1761,33 @@ static void init_clone_info(struct clone_info *ci, struct dm_io *io,
|
||||
ci->sector_count = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
|
||||
struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* For mapped device that need zone append emulation, we must
|
||||
* split any large BIO that straddles zone boundaries.
|
||||
*/
|
||||
return dm_emulate_zone_append(md) && bio_straddles_zones(bio) &&
|
||||
!bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING);
|
||||
}
|
||||
static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
|
||||
{
|
||||
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
|
||||
}
|
||||
#else
|
||||
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
|
||||
struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Entry point to split a bio into clones and submit them to the targets.
|
||||
*/
|
||||
@ -1783,19 +1797,32 @@ static void dm_split_and_process_bio(struct mapped_device *md,
|
||||
struct clone_info ci;
|
||||
struct dm_io *io;
|
||||
blk_status_t error = BLK_STS_OK;
|
||||
bool is_abnormal;
|
||||
bool is_abnormal, need_split;
|
||||
|
||||
is_abnormal = is_abnormal_io(bio);
|
||||
if (unlikely(is_abnormal)) {
|
||||
need_split = is_abnormal = is_abnormal_io(bio);
|
||||
if (static_branch_unlikely(&zoned_enabled))
|
||||
need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
|
||||
|
||||
if (unlikely(need_split)) {
|
||||
/*
|
||||
* Use bio_split_to_limits() for abnormal IO (e.g. discard, etc)
|
||||
* otherwise associated queue_limits won't be imposed.
|
||||
* Also split the BIO for mapped devices needing zone append
|
||||
* emulation to ensure that the BIO does not cross zone
|
||||
* boundaries.
|
||||
*/
|
||||
bio = bio_split_to_limits(bio);
|
||||
if (!bio)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the block layer zone write plugging for mapped devices that
|
||||
* need zone append emulation (e.g. dm-crypt).
|
||||
*/
|
||||
if (static_branch_unlikely(&zoned_enabled) && dm_zone_plug_bio(md, bio))
|
||||
return;
|
||||
|
||||
/* Only support nowait for normal IO */
|
||||
if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
|
||||
io = alloc_io(md, bio, GFP_NOWAIT);
|
||||
@ -2016,7 +2043,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
|
||||
md->dax_dev = NULL;
|
||||
}
|
||||
|
||||
dm_cleanup_zoned_dev(md);
|
||||
if (md->disk) {
|
||||
spin_lock(&_minor_lock);
|
||||
md->disk->private_data = NULL;
|
||||
|
@ -104,13 +104,11 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q);
|
||||
void dm_zone_endio(struct dm_io *io, struct bio *clone);
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
void dm_cleanup_zoned_dev(struct mapped_device *md);
|
||||
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data);
|
||||
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
|
||||
int dm_zone_map_bio(struct dm_target_io *io);
|
||||
#else
|
||||
static inline void dm_cleanup_zoned_dev(struct mapped_device *md) {}
|
||||
#define dm_blk_report_zones NULL
|
||||
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
|
||||
{
|
||||
|
@ -1424,7 +1424,7 @@ __acquires(bitmap->lock)
|
||||
sector_t chunk = offset >> bitmap->chunkshift;
|
||||
unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
|
||||
unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
|
||||
sector_t csize;
|
||||
sector_t csize = ((sector_t)1) << bitmap->chunkshift;
|
||||
int err;
|
||||
|
||||
if (page >= bitmap->pages) {
|
||||
@ -1433,6 +1433,7 @@ __acquires(bitmap->lock)
|
||||
* End-of-device while looking for a whole page or
|
||||
* user set a huge number to sysfs bitmap_set_bits.
|
||||
*/
|
||||
*blocks = csize - (offset & (csize - 1));
|
||||
return NULL;
|
||||
}
|
||||
err = md_bitmap_checkpage(bitmap, page, create, 0);
|
||||
@ -1441,8 +1442,7 @@ __acquires(bitmap->lock)
|
||||
bitmap->bp[page].map == NULL)
|
||||
csize = ((sector_t)1) << (bitmap->chunkshift +
|
||||
PAGE_COUNTER_SHIFT);
|
||||
else
|
||||
csize = ((sector_t)1) << bitmap->chunkshift;
|
||||
|
||||
*blocks = csize - (offset & (csize - 1));
|
||||
|
||||
if (err < 0)
|
||||
|
@ -8087,7 +8087,8 @@ void md_wakeup_thread(struct md_thread __rcu *thread)
|
||||
if (t) {
|
||||
pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
|
||||
set_bit(THREAD_WAKEUP, &t->flags);
|
||||
wake_up(&t->wqueue);
|
||||
if (wq_has_sleeper(&t->wqueue))
|
||||
wake_up(&t->wqueue);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@ -8582,6 +8583,10 @@ static int is_mddev_idle(struct mddev *mddev, int init)
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev) {
|
||||
struct gendisk *disk = rdev->bdev->bd_disk;
|
||||
|
||||
if (!init && !blk_queue_io_stat(disk->queue))
|
||||
continue;
|
||||
|
||||
curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
|
||||
atomic_read(&disk->sync_io);
|
||||
/* sync IO will cause sync_io to increase before the disk_stats
|
||||
|
@ -621,7 +621,8 @@ extern void mddev_unlock(struct mddev *mddev);
|
||||
|
||||
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
|
||||
{
|
||||
atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
|
||||
if (blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
atomic_add(nr_sectors, &bdev->bd_disk->sync_io);
|
||||
}
|
||||
|
||||
static inline void md_sync_acct_bio(struct bio *bio, unsigned long nr_sectors)
|
||||
|
@ -36,7 +36,6 @@
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/raid/pq.h>
|
||||
#include <linux/async_tx.h>
|
||||
@ -6734,6 +6733,9 @@ static void raid5d(struct md_thread *thread)
|
||||
int batch_size, released;
|
||||
unsigned int offset;
|
||||
|
||||
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
|
||||
break;
|
||||
|
||||
released = release_stripe_list(conf, conf->temp_inactive_list);
|
||||
if (released)
|
||||
clear_bit(R5_DID_ALLOC, &conf->cache_state);
|
||||
@ -6770,18 +6772,7 @@ static void raid5d(struct md_thread *thread)
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_check_recovery(mddev);
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
|
||||
/*
|
||||
* Waiting on MD_SB_CHANGE_PENDING below may deadlock
|
||||
* seeing md_check_recovery() is needed to clear
|
||||
* the flag when using mdmon.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
wait_event_lock_irq(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
|
||||
conf->device_lock);
|
||||
}
|
||||
pr_debug("%d stripes handled\n", handled);
|
||||
|
||||
|
@ -2132,7 +2132,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
|
||||
blk_mq_unfreeze_queue(ns->disk->queue);
|
||||
|
||||
if (blk_queue_is_zoned(ns->queue)) {
|
||||
ret = blk_revalidate_disk_zones(ns->disk, NULL);
|
||||
ret = blk_revalidate_disk_zones(ns->disk);
|
||||
if (ret && !nvme_first_scan(ns->disk))
|
||||
goto out;
|
||||
}
|
||||
|
@ -52,14 +52,10 @@ bool nvmet_bdev_zns_enable(struct nvmet_ns *ns)
|
||||
if (get_capacity(bd_disk) & (bdev_zone_sectors(ns->bdev) - 1))
|
||||
return false;
|
||||
/*
|
||||
* ZNS does not define a conventional zone type. If the underlying
|
||||
* device has a bitmap set indicating the existence of conventional
|
||||
* zones, reject the device. Otherwise, use report zones to detect if
|
||||
* the device has conventional zones.
|
||||
* ZNS does not define a conventional zone type. Use report zones
|
||||
* to detect if the device has conventional zones and reject it if
|
||||
* it does.
|
||||
*/
|
||||
if (ns->bdev->bd_disk->conv_zones_bitmap)
|
||||
return false;
|
||||
|
||||
ret = blkdev_report_zones(ns->bdev, 0, bdev_nr_zones(ns->bdev),
|
||||
validate_conv_zones_cb, NULL);
|
||||
if (ret < 0)
|
||||
|
@ -1869,7 +1869,6 @@ out_put_budget:
|
||||
case BLK_STS_OK:
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_ZONE_RESOURCE:
|
||||
if (scsi_device_blocked(sdev))
|
||||
ret = BLK_STS_DEV_RESOURCE;
|
||||
break;
|
||||
|
@ -1260,12 +1260,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
|
||||
}
|
||||
}
|
||||
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND) {
|
||||
ret = sd_zbc_prepare_zone_append(cmd, &lba, nr_blocks);
|
||||
if (ret)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fua = rq->cmd_flags & REQ_FUA ? 0x8 : 0;
|
||||
dix = scsi_prot_sg_count(cmd);
|
||||
dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
|
||||
@ -1348,7 +1342,6 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
|
||||
return sd_setup_flush_cmnd(cmd);
|
||||
case REQ_OP_READ:
|
||||
case REQ_OP_WRITE:
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
return sd_setup_read_write_cmnd(cmd);
|
||||
case REQ_OP_ZONE_RESET:
|
||||
return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
|
||||
@ -3981,7 +3974,6 @@ static void scsi_disk_release(struct device *dev)
|
||||
struct scsi_disk *sdkp = to_scsi_disk(dev);
|
||||
|
||||
ida_free(&sd_index_ida, sdkp->index);
|
||||
sd_zbc_free_zone_info(sdkp);
|
||||
put_device(&sdkp->device->sdev_gendev);
|
||||
free_opal_dev(sdkp->opal_dev);
|
||||
|
||||
|
@ -104,12 +104,6 @@ struct scsi_disk {
|
||||
* between zone starting LBAs is constant.
|
||||
*/
|
||||
u32 zone_starting_lba_gran;
|
||||
u32 *zones_wp_offset;
|
||||
spinlock_t zones_wp_offset_lock;
|
||||
u32 *rev_wp_offset;
|
||||
struct mutex rev_mutex;
|
||||
struct work_struct zone_wp_offset_work;
|
||||
char *zone_wp_update_buf;
|
||||
#endif
|
||||
atomic_t openers;
|
||||
sector_t capacity; /* size in logical blocks */
|
||||
@ -245,7 +239,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
|
||||
void sd_zbc_free_zone_info(struct scsi_disk *sdkp);
|
||||
int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE]);
|
||||
int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
|
||||
blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
@ -255,13 +248,8 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data);
|
||||
|
||||
blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
|
||||
unsigned int nr_blocks);
|
||||
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
static inline void sd_zbc_free_zone_info(struct scsi_disk *sdkp) {}
|
||||
|
||||
static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
|
||||
{
|
||||
return 0;
|
||||
@ -285,13 +273,6 @@ static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
|
||||
return good_bytes;
|
||||
}
|
||||
|
||||
static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
|
||||
sector_t *lba,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
return BLK_STS_TARGET;
|
||||
}
|
||||
|
||||
#define sd_zbc_report_zones NULL
|
||||
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
@ -23,36 +23,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "sd_trace.h"
|
||||
|
||||
/**
|
||||
* sd_zbc_get_zone_wp_offset - Get zone write pointer offset.
|
||||
* @zone: Zone for which to return the write pointer offset.
|
||||
*
|
||||
* Return: offset of the write pointer from the start of the zone.
|
||||
*/
|
||||
static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
|
||||
{
|
||||
if (zone->type == ZBC_ZONE_TYPE_CONV)
|
||||
return 0;
|
||||
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_IMP_OPEN:
|
||||
case BLK_ZONE_COND_EXP_OPEN:
|
||||
case BLK_ZONE_COND_CLOSED:
|
||||
return zone->wp - zone->start;
|
||||
case BLK_ZONE_COND_FULL:
|
||||
return zone->len;
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
default:
|
||||
/*
|
||||
* Offline and read-only zones do not have a valid
|
||||
* write pointer. Use 0 as for an empty zone.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Whether or not a SCSI zone descriptor describes a gap zone. */
|
||||
static bool sd_zbc_is_gap_zone(const u8 buf[64])
|
||||
{
|
||||
@ -121,9 +91,6 @@ static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (sdkp->rev_wp_offset)
|
||||
sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -347,123 +314,6 @@ static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
#define SD_ZBC_INVALID_WP_OFST (~0u)
|
||||
#define SD_ZBC_UPDATING_WP_OFST (SD_ZBC_INVALID_WP_OFST - 1)
|
||||
|
||||
static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
struct scsi_disk *sdkp = data;
|
||||
|
||||
lockdep_assert_held(&sdkp->zones_wp_offset_lock);
|
||||
|
||||
sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* An attempt to append a zone triggered an invalid write pointer error.
|
||||
* Reread the write pointer of the zone(s) in which the append failed.
|
||||
*/
|
||||
static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
|
||||
{
|
||||
struct scsi_disk *sdkp;
|
||||
unsigned long flags;
|
||||
sector_t zno;
|
||||
int ret;
|
||||
|
||||
sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
|
||||
|
||||
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
|
||||
for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) {
|
||||
if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
|
||||
continue;
|
||||
|
||||
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
|
||||
ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
|
||||
SD_BUF_SIZE,
|
||||
zno * sdkp->zone_info.zone_blocks, true);
|
||||
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
|
||||
if (!ret)
|
||||
sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
|
||||
zno, sd_zbc_update_wp_offset_cb,
|
||||
sdkp);
|
||||
}
|
||||
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
|
||||
|
||||
scsi_device_put(sdkp->device);
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
|
||||
* @cmd: the command to setup
|
||||
* @lba: the LBA to patch
|
||||
* @nr_blocks: the number of LBAs to be written
|
||||
*
|
||||
* Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
|
||||
* @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
|
||||
* patching of the lba for an emulated ZONE_APPEND command.
|
||||
*
|
||||
* In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
|
||||
* schedule a REPORT ZONES command and return BLK_STS_IOERR.
|
||||
*/
|
||||
blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
|
||||
unsigned int nr_blocks)
|
||||
{
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
unsigned int wp_offset, zno = blk_rq_zone_no(rq);
|
||||
unsigned long flags;
|
||||
blk_status_t ret;
|
||||
|
||||
ret = sd_zbc_cmnd_checks(cmd);
|
||||
if (ret != BLK_STS_OK)
|
||||
return ret;
|
||||
|
||||
if (!blk_rq_zone_is_seq(rq))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
/* Unlock of the write lock will happen in sd_zbc_complete() */
|
||||
if (!blk_req_zone_write_trylock(rq))
|
||||
return BLK_STS_ZONE_RESOURCE;
|
||||
|
||||
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
|
||||
wp_offset = sdkp->zones_wp_offset[zno];
|
||||
switch (wp_offset) {
|
||||
case SD_ZBC_INVALID_WP_OFST:
|
||||
/*
|
||||
* We are about to schedule work to update a zone write pointer
|
||||
* offset, which will cause the zone append command to be
|
||||
* requeued. So make sure that the scsi device does not go away
|
||||
* while the work is being processed.
|
||||
*/
|
||||
if (scsi_device_get(sdkp->device)) {
|
||||
ret = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
|
||||
schedule_work(&sdkp->zone_wp_offset_work);
|
||||
fallthrough;
|
||||
case SD_ZBC_UPDATING_WP_OFST:
|
||||
ret = BLK_STS_DEV_RESOURCE;
|
||||
break;
|
||||
default:
|
||||
wp_offset = sectors_to_logical(sdkp->device, wp_offset);
|
||||
if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) {
|
||||
ret = BLK_STS_IOERR;
|
||||
break;
|
||||
}
|
||||
|
||||
trace_scsi_prepare_zone_append(cmd, *lba, wp_offset);
|
||||
*lba += wp_offset;
|
||||
}
|
||||
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
|
||||
if (ret)
|
||||
blk_req_zone_write_unlock(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
|
||||
* can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
|
||||
@ -504,96 +354,6 @@ blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static bool sd_zbc_need_zone_wp_update(struct request *rq)
|
||||
{
|
||||
switch (req_op(rq)) {
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
case REQ_OP_ZONE_RESET:
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
return true;
|
||||
case REQ_OP_WRITE:
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
return blk_rq_zone_is_seq(rq);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
|
||||
* @cmd: Completed command
|
||||
* @good_bytes: Command reply bytes
|
||||
*
|
||||
* Called from sd_zbc_complete() to handle the update of the cached zone write
|
||||
* pointer value in case an update is needed.
|
||||
*/
|
||||
static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
|
||||
unsigned int good_bytes)
|
||||
{
|
||||
int result = cmd->result;
|
||||
struct request *rq = scsi_cmd_to_rq(cmd);
|
||||
struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
|
||||
unsigned int zno = blk_rq_zone_no(rq);
|
||||
enum req_op op = req_op(rq);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If we got an error for a command that needs updating the write
|
||||
* pointer offset cache, we must mark the zone wp offset entry as
|
||||
* invalid to force an update from disk the next time a zone append
|
||||
* command is issued.
|
||||
*/
|
||||
spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
|
||||
|
||||
if (result && op != REQ_OP_ZONE_RESET_ALL) {
|
||||
if (op == REQ_OP_ZONE_APPEND) {
|
||||
/* Force complete completion (no retry) */
|
||||
good_bytes = 0;
|
||||
scsi_set_resid(cmd, blk_rq_bytes(rq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Force an update of the zone write pointer offset on
|
||||
* the next zone append access.
|
||||
*/
|
||||
if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
|
||||
sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
|
||||
goto unlock_wp_offset;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case REQ_OP_ZONE_APPEND:
|
||||
trace_scsi_zone_wp_update(cmd, rq->__sector,
|
||||
sdkp->zones_wp_offset[zno], good_bytes);
|
||||
rq->__sector += sdkp->zones_wp_offset[zno];
|
||||
fallthrough;
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_WRITE:
|
||||
if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
|
||||
sdkp->zones_wp_offset[zno] +=
|
||||
good_bytes >> SECTOR_SHIFT;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET:
|
||||
sdkp->zones_wp_offset[zno] = 0;
|
||||
break;
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
memset(sdkp->zones_wp_offset, 0,
|
||||
sdkp->zone_info.nr_zones * sizeof(unsigned int));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unlock_wp_offset:
|
||||
spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
|
||||
|
||||
return good_bytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_complete - ZBC command post processing.
|
||||
* @cmd: Completed command
|
||||
@ -619,11 +379,7 @@ unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
* so be quiet about the error.
|
||||
*/
|
||||
rq->rq_flags |= RQF_QUIET;
|
||||
} else if (sd_zbc_need_zone_wp_update(rq))
|
||||
good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
|
||||
|
||||
if (req_op(rq) == REQ_OP_ZONE_APPEND)
|
||||
blk_req_zone_write_unlock(rq);
|
||||
}
|
||||
|
||||
return good_bytes;
|
||||
}
|
||||
@ -780,46 +536,6 @@ static void sd_zbc_print_zones(struct scsi_disk *sdkp)
|
||||
sdkp->zone_info.zone_blocks);
|
||||
}
|
||||
|
||||
static int sd_zbc_init_disk(struct scsi_disk *sdkp)
|
||||
{
|
||||
sdkp->zones_wp_offset = NULL;
|
||||
spin_lock_init(&sdkp->zones_wp_offset_lock);
|
||||
sdkp->rev_wp_offset = NULL;
|
||||
mutex_init(&sdkp->rev_mutex);
|
||||
INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
|
||||
sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
|
||||
if (!sdkp->zone_wp_update_buf)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sd_zbc_free_zone_info(struct scsi_disk *sdkp)
|
||||
{
|
||||
if (!sdkp->zone_wp_update_buf)
|
||||
return;
|
||||
|
||||
/* Serialize against revalidate zones */
|
||||
mutex_lock(&sdkp->rev_mutex);
|
||||
|
||||
kvfree(sdkp->zones_wp_offset);
|
||||
sdkp->zones_wp_offset = NULL;
|
||||
kfree(sdkp->zone_wp_update_buf);
|
||||
sdkp->zone_wp_update_buf = NULL;
|
||||
|
||||
sdkp->early_zone_info = (struct zoned_disk_info){ };
|
||||
sdkp->zone_info = (struct zoned_disk_info){ };
|
||||
|
||||
mutex_unlock(&sdkp->rev_mutex);
|
||||
}
|
||||
|
||||
static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
|
||||
{
|
||||
struct scsi_disk *sdkp = scsi_disk(disk);
|
||||
|
||||
swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call blk_revalidate_disk_zones() if any of the zoned disk properties have
|
||||
* changed that make it necessary to call that function. Called by
|
||||
@ -831,18 +547,8 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
|
||||
struct request_queue *q = disk->queue;
|
||||
u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
|
||||
unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
|
||||
int ret = 0;
|
||||
unsigned int flags;
|
||||
|
||||
/*
|
||||
* For all zoned disks, initialize zone append emulation data if not
|
||||
* already done.
|
||||
*/
|
||||
if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
|
||||
ret = sd_zbc_init_disk(sdkp);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* There is nothing to do for regular disks, including host-aware disks
|
||||
@ -851,50 +557,32 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
|
||||
if (!blk_queue_is_zoned(q))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Make sure revalidate zones are serialized to ensure exclusive
|
||||
* updates of the scsi disk data.
|
||||
*/
|
||||
mutex_lock(&sdkp->rev_mutex);
|
||||
|
||||
if (sdkp->zone_info.zone_blocks == zone_blocks &&
|
||||
sdkp->zone_info.nr_zones == nr_zones &&
|
||||
disk->nr_zones == nr_zones)
|
||||
goto unlock;
|
||||
return 0;
|
||||
|
||||
flags = memalloc_noio_save();
|
||||
sdkp->zone_info.zone_blocks = zone_blocks;
|
||||
sdkp->zone_info.nr_zones = nr_zones;
|
||||
sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
|
||||
if (!sdkp->rev_wp_offset) {
|
||||
ret = -ENOMEM;
|
||||
memalloc_noio_restore(flags);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
blk_queue_chunk_sectors(q,
|
||||
logical_to_sectors(sdkp->device, zone_blocks));
|
||||
blk_queue_max_zone_append_sectors(q,
|
||||
q->limits.max_segments << PAGE_SECTORS_SHIFT);
|
||||
|
||||
ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
|
||||
/* Enable block layer zone append emulation */
|
||||
blk_queue_max_zone_append_sectors(q, 0);
|
||||
|
||||
flags = memalloc_noio_save();
|
||||
ret = blk_revalidate_disk_zones(disk);
|
||||
memalloc_noio_restore(flags);
|
||||
kvfree(sdkp->rev_wp_offset);
|
||||
sdkp->rev_wp_offset = NULL;
|
||||
|
||||
if (ret) {
|
||||
sdkp->zone_info = (struct zoned_disk_info){ };
|
||||
sdkp->capacity = 0;
|
||||
goto unlock;
|
||||
return ret;
|
||||
}
|
||||
|
||||
sd_zbc_print_zones(sdkp);
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&sdkp->rev_mutex);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -917,10 +605,8 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
|
||||
if (!sd_is_zoned(sdkp)) {
|
||||
/*
|
||||
* Device managed or normal SCSI disk, no special handling
|
||||
* required. Nevertheless, free the disk zone information in
|
||||
* case the device type changed.
|
||||
* required.
|
||||
*/
|
||||
sd_zbc_free_zone_info(sdkp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -941,7 +627,6 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
|
||||
|
||||
/* The drive satisfies the kernel restrictions: set it up */
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
|
||||
if (sdkp->zones_max_open == U32_MAX)
|
||||
disk_set_max_open_zones(disk, 0);
|
||||
else
|
||||
|
@ -331,12 +331,11 @@ static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
|
||||
static void merge_rbio(struct btrfs_raid_bio *dest,
|
||||
struct btrfs_raid_bio *victim)
|
||||
{
|
||||
bio_list_merge(&dest->bio_list, &victim->bio_list);
|
||||
bio_list_merge_init(&dest->bio_list, &victim->bio_list);
|
||||
dest->bio_list_bytes += victim->bio_list_bytes;
|
||||
/* Also inherit the bitmaps from @victim. */
|
||||
bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
|
||||
dest->stripe_nsectors);
|
||||
bio_list_init(&victim->bio_list);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -615,6 +615,13 @@ static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
|
||||
bl->tail = bl2->tail;
|
||||
}
|
||||
|
||||
static inline void bio_list_merge_init(struct bio_list *bl,
|
||||
struct bio_list *bl2)
|
||||
{
|
||||
bio_list_merge(bl, bl2);
|
||||
bio_list_init(bl2);
|
||||
}
|
||||
|
||||
static inline void bio_list_merge_head(struct bio_list *bl,
|
||||
struct bio_list *bl2)
|
||||
{
|
||||
@ -824,5 +831,9 @@ static inline void bio_clear_polled(struct bio *bio)
|
||||
|
||||
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
|
||||
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
|
||||
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
|
||||
|
||||
struct bio *blk_alloc_discard_bio(struct block_device *bdev,
|
||||
sector_t *sector, sector_t *nr_sects, gfp_t gfp_mask);
|
||||
|
||||
#endif /* __LINUX_BIO_H */
|
||||
|
@ -54,8 +54,8 @@ typedef __u32 __bitwise req_flags_t;
|
||||
/* Look at ->special_vec for the actual data payload instead of the
|
||||
bio chain. */
|
||||
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
|
||||
/* The per-zone write lock is held for this request */
|
||||
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
|
||||
/* The request completion needs to be signaled to zone write pluging. */
|
||||
#define RQF_ZONE_WRITE_PLUGGING ((__force req_flags_t)(1 << 20))
|
||||
/* ->timeout has been called, don't expire again */
|
||||
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
|
||||
#define RQF_RESV ((__force req_flags_t)(1 << 23))
|
||||
@ -1150,85 +1150,4 @@ static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
|
||||
}
|
||||
void blk_dump_rq_flags(struct request *, char *);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
static inline unsigned int blk_rq_zone_no(struct request *rq)
|
||||
{
|
||||
return disk_zone_no(rq->q->disk, blk_rq_pos(rq));
|
||||
}
|
||||
|
||||
static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
|
||||
{
|
||||
return disk_zone_is_seq(rq->q->disk, blk_rq_pos(rq));
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_rq_is_seq_zoned_write() - Check if @rq requires write serialization.
|
||||
* @rq: Request to examine.
|
||||
*
|
||||
* Note: REQ_OP_ZONE_APPEND requests do not require serialization.
|
||||
*/
|
||||
static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
|
||||
{
|
||||
return op_needs_zoned_write_locking(req_op(rq)) &&
|
||||
blk_rq_zone_is_seq(rq);
|
||||
}
|
||||
|
||||
bool blk_req_needs_zone_write_lock(struct request *rq);
|
||||
bool blk_req_zone_write_trylock(struct request *rq);
|
||||
void __blk_req_zone_write_lock(struct request *rq);
|
||||
void __blk_req_zone_write_unlock(struct request *rq);
|
||||
|
||||
static inline void blk_req_zone_write_lock(struct request *rq)
|
||||
{
|
||||
if (blk_req_needs_zone_write_lock(rq))
|
||||
__blk_req_zone_write_lock(rq);
|
||||
}
|
||||
|
||||
static inline void blk_req_zone_write_unlock(struct request *rq)
|
||||
{
|
||||
if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
|
||||
__blk_req_zone_write_unlock(rq);
|
||||
}
|
||||
|
||||
static inline bool blk_req_zone_is_write_locked(struct request *rq)
|
||||
{
|
||||
return rq->q->disk->seq_zones_wlock &&
|
||||
test_bit(blk_rq_zone_no(rq), rq->q->disk->seq_zones_wlock);
|
||||
}
|
||||
|
||||
static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
|
||||
{
|
||||
if (!blk_req_needs_zone_write_lock(rq))
|
||||
return true;
|
||||
return !blk_req_zone_is_write_locked(rq);
|
||||
}
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline bool blk_rq_is_seq_zoned_write(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool blk_req_needs_zone_write_lock(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blk_req_zone_write_lock(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void blk_req_zone_write_unlock(struct request *rq)
|
||||
{
|
||||
}
|
||||
static inline bool blk_req_zone_is_write_locked(struct request *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
#endif /* BLK_MQ_H */
|
||||
|
@ -130,18 +130,6 @@ typedef u16 blk_short_t;
|
||||
*/
|
||||
#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13)
|
||||
|
||||
/*
|
||||
* BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
|
||||
* related resources are unavailable, but the driver can guarantee the queue
|
||||
* will be rerun in the future once the resources become available again.
|
||||
*
|
||||
* This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
|
||||
* a zone specific resource and IO to a different zone on the same device could
|
||||
* still be served. Examples of that are zones that are write-locked, but a read
|
||||
* to the same zone could be served.
|
||||
*/
|
||||
#define BLK_STS_ZONE_RESOURCE ((__force blk_status_t)14)
|
||||
|
||||
/*
|
||||
* BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
|
||||
* path if the device returns a status indicating that too many zone resources
|
||||
@ -149,7 +137,7 @@ typedef u16 blk_short_t;
|
||||
* after the number of open zones decreases below the device's limits, which is
|
||||
* reported in the request_queue's max_open_zones.
|
||||
*/
|
||||
#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)15)
|
||||
#define BLK_STS_ZONE_OPEN_RESOURCE ((__force blk_status_t)14)
|
||||
|
||||
/*
|
||||
* BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
|
||||
@ -158,20 +146,20 @@ typedef u16 blk_short_t;
|
||||
* after the number of active zones decreases below the device's limits, which
|
||||
* is reported in the request_queue's max_active_zones.
|
||||
*/
|
||||
#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)16)
|
||||
#define BLK_STS_ZONE_ACTIVE_RESOURCE ((__force blk_status_t)15)
|
||||
|
||||
/*
|
||||
* BLK_STS_OFFLINE is returned from the driver when the target device is offline
|
||||
* or is being taken offline. This could help differentiate the case where a
|
||||
* device is intentionally being shut down from a real I/O error.
|
||||
*/
|
||||
#define BLK_STS_OFFLINE ((__force blk_status_t)17)
|
||||
#define BLK_STS_OFFLINE ((__force blk_status_t)16)
|
||||
|
||||
/*
|
||||
* BLK_STS_DURATION_LIMIT is returned from the driver when the target device
|
||||
* aborted the command because it exceeded one of its Command Duration Limits.
|
||||
*/
|
||||
#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)18)
|
||||
#define BLK_STS_DURATION_LIMIT ((__force blk_status_t)17)
|
||||
|
||||
/**
|
||||
* blk_path_error - returns true if error may be path related
|
||||
@ -228,7 +216,12 @@ struct bio {
|
||||
|
||||
struct bvec_iter bi_iter;
|
||||
|
||||
blk_qc_t bi_cookie;
|
||||
union {
|
||||
/* for polled bios: */
|
||||
blk_qc_t bi_cookie;
|
||||
/* for plugged zoned writes only: */
|
||||
unsigned int __bi_nr_segments;
|
||||
};
|
||||
bio_end_io_t *bi_end_io;
|
||||
void *bi_private;
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
@ -298,7 +291,8 @@ enum {
|
||||
BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
|
||||
BIO_QOS_MERGED, /* but went through rq_qos merge path */
|
||||
BIO_REMAPPED,
|
||||
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
|
||||
BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */
|
||||
BIO_EMULATES_ZONE_APPEND, /* bio emulates a zone append operation */
|
||||
BIO_FLAG_LAST
|
||||
};
|
||||
|
||||
|
@ -179,22 +179,21 @@ struct gendisk {
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
/*
|
||||
* Zoned block device information for request dispatch control.
|
||||
* nr_zones is the total number of zones of the device. This is always
|
||||
* 0 for regular block devices. conv_zones_bitmap is a bitmap of nr_zones
|
||||
* bits which indicates if a zone is conventional (bit set) or
|
||||
* sequential (bit clear). seq_zones_wlock is a bitmap of nr_zones
|
||||
* bits which indicates if a zone is write locked, that is, if a write
|
||||
* request targeting the zone was dispatched.
|
||||
*
|
||||
* Reads of this information must be protected with blk_queue_enter() /
|
||||
* blk_queue_exit(). Modifying this information is only allowed while
|
||||
* no requests are being processed. See also blk_mq_freeze_queue() and
|
||||
* blk_mq_unfreeze_queue().
|
||||
* Zoned block device information. Reads of this information must be
|
||||
* protected with blk_queue_enter() / blk_queue_exit(). Modifying this
|
||||
* information is only allowed while no requests are being processed.
|
||||
* See also blk_mq_freeze_queue() and blk_mq_unfreeze_queue().
|
||||
*/
|
||||
unsigned int nr_zones;
|
||||
unsigned int zone_capacity;
|
||||
unsigned long *conv_zones_bitmap;
|
||||
unsigned long *seq_zones_wlock;
|
||||
unsigned int zone_wplugs_hash_bits;
|
||||
spinlock_t zone_wplugs_lock;
|
||||
struct mempool_s *zone_wplugs_pool;
|
||||
struct hlist_head *zone_wplugs_hash;
|
||||
struct list_head zone_wplugs_err_list;
|
||||
struct work_struct zone_wplugs_work;
|
||||
struct workqueue_struct *zone_wplugs_wq;
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
#if IS_ENABLED(CONFIG_CDROM)
|
||||
@ -233,6 +232,19 @@ static inline unsigned int disk_openers(struct gendisk *disk)
|
||||
return atomic_read(&disk->part0->bd_openers);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_has_partscan - return %true if partition scanning is enabled on a disk
|
||||
* @disk: disk to check
|
||||
*
|
||||
* Returns %true if partitions scanning is enabled for @disk, or %false if
|
||||
* partition scanning is disabled either permanently or temporarily.
|
||||
*/
|
||||
static inline bool disk_has_partscan(struct gendisk *disk)
|
||||
{
|
||||
return !(disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN)) &&
|
||||
!test_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
|
||||
}
|
||||
|
||||
/*
|
||||
* The gendisk is refcounted by the part0 block_device, and the bd_device
|
||||
* therein is also used for device model presentation in sysfs.
|
||||
@ -331,8 +343,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data);
|
||||
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
|
||||
sector_t sectors, sector_t nr_sectors);
|
||||
int blk_revalidate_disk_zones(struct gendisk *disk,
|
||||
void (*update_driver_data)(struct gendisk *disk));
|
||||
int blk_revalidate_disk_zones(struct gendisk *disk);
|
||||
|
||||
/*
|
||||
* Independent access ranges: struct blk_independent_access_range describes
|
||||
@ -449,8 +460,6 @@ struct request_queue {
|
||||
|
||||
atomic_t nr_active_requests_shared_tags;
|
||||
|
||||
unsigned int required_elevator_features;
|
||||
|
||||
struct blk_mq_tags *sched_shared_tags;
|
||||
|
||||
struct list_head icq_list;
|
||||
@ -633,15 +642,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
|
||||
return sector >> ilog2(disk->queue->limits.chunk_sectors);
|
||||
}
|
||||
|
||||
static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
if (!blk_queue_is_zoned(disk->queue))
|
||||
return false;
|
||||
if (!disk->conv_zones_bitmap)
|
||||
return true;
|
||||
return !test_bit(disk_zone_no(disk, sector), disk->conv_zones_bitmap);
|
||||
}
|
||||
|
||||
static inline void disk_set_max_open_zones(struct gendisk *disk,
|
||||
unsigned int max_open_zones)
|
||||
{
|
||||
@ -664,6 +664,7 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
|
||||
return bdev->bd_disk->queue->limits.max_active_zones;
|
||||
}
|
||||
|
||||
bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline unsigned int bdev_nr_zones(struct block_device *bdev)
|
||||
{
|
||||
@ -674,10 +675,6 @@ static inline unsigned int disk_nr_zones(struct gendisk *disk)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline bool disk_zone_is_seq(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector)
|
||||
{
|
||||
return 0;
|
||||
@ -691,6 +688,10 @@ static inline unsigned int bdev_max_active_zones(struct block_device *bdev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
static inline unsigned int blk_queue_depth(struct request_queue *q)
|
||||
@ -855,9 +856,11 @@ static inline unsigned int bio_zone_no(struct bio *bio)
|
||||
return disk_zone_no(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector);
|
||||
}
|
||||
|
||||
static inline unsigned int bio_zone_is_seq(struct bio *bio)
|
||||
static inline bool bio_straddles_zones(struct bio *bio)
|
||||
{
|
||||
return disk_zone_is_seq(bio->bi_bdev->bd_disk, bio->bi_iter.bi_sector);
|
||||
return bio_sectors(bio) &&
|
||||
bio_zone_no(bio) !=
|
||||
disk_zone_no(bio->bi_bdev->bd_disk, bio_end_sector(bio) - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -942,14 +945,6 @@ disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
|
||||
void disk_set_independent_access_ranges(struct gendisk *disk,
|
||||
struct blk_independent_access_ranges *iars);
|
||||
|
||||
/*
|
||||
* Elevator features for blk_queue_required_elevator_features:
|
||||
*/
|
||||
/* Supports zoned block devices sequential write constraint */
|
||||
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
|
||||
|
||||
extern void blk_queue_required_elevator_features(struct request_queue *q,
|
||||
unsigned int features);
|
||||
extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
|
||||
struct device *dev);
|
||||
|
||||
@ -1156,12 +1151,29 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
|
||||
return q->limits.max_segment_size;
|
||||
}
|
||||
|
||||
static inline unsigned int queue_max_zone_append_sectors(const struct request_queue *q)
|
||||
static inline unsigned int queue_limits_max_zone_append_sectors(struct queue_limits *l)
|
||||
{
|
||||
unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
|
||||
|
||||
const struct queue_limits *l = &q->limits;
|
||||
return min_not_zero(l->max_zone_append_sectors, max_sectors);
|
||||
}
|
||||
|
||||
return min(l->max_zone_append_sectors, l->max_sectors);
|
||||
static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
|
||||
{
|
||||
if (!blk_queue_is_zoned(q))
|
||||
return 0;
|
||||
|
||||
return queue_limits_max_zone_append_sectors(&q->limits);
|
||||
}
|
||||
|
||||
static inline bool queue_emulates_zone_append(struct request_queue *q)
|
||||
{
|
||||
return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
|
||||
}
|
||||
|
||||
static inline bool bdev_emulates_zone_append(struct block_device *bdev)
|
||||
{
|
||||
return queue_emulates_zone_append(bdev_get_queue(bdev));
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
@ -1303,18 +1315,6 @@ static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec)
|
||||
return disk_zone_no(bdev->bd_disk, sec);
|
||||
}
|
||||
|
||||
/* Whether write serialization is required for @op on zoned devices. */
|
||||
static inline bool op_needs_zoned_write_locking(enum req_op op)
|
||||
{
|
||||
return op == REQ_OP_WRITE || op == REQ_OP_WRITE_ZEROES;
|
||||
}
|
||||
|
||||
static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
|
||||
enum req_op op)
|
||||
{
|
||||
return bdev_is_zoned(bdev) && op_needs_zoned_write_locking(op);
|
||||
}
|
||||
|
||||
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
@ -1330,6 +1330,12 @@ static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev,
|
||||
return sector & (bdev_zone_sectors(bdev) - 1);
|
||||
}
|
||||
|
||||
static inline sector_t bio_offset_from_zone_start(struct bio *bio)
|
||||
{
|
||||
return bdev_offset_from_zone_start(bio->bi_bdev,
|
||||
bio->bi_iter.bi_sector);
|
||||
}
|
||||
|
||||
static inline bool bdev_is_zone_start(struct block_device *bdev,
|
||||
sector_t sector)
|
||||
{
|
||||
|
@ -494,18 +494,18 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
|
||||
struct sbitmap_word *map = &sb->map[index];
|
||||
unsigned long get_mask;
|
||||
unsigned int map_depth = __map_depth(sb, index);
|
||||
unsigned long val;
|
||||
|
||||
sbitmap_deferred_clear(map);
|
||||
if (map->word == (1UL << (map_depth - 1)) - 1)
|
||||
val = READ_ONCE(map->word);
|
||||
if (val == (1UL << (map_depth - 1)) - 1)
|
||||
goto next;
|
||||
|
||||
nr = find_first_zero_bit(&map->word, map_depth);
|
||||
nr = find_first_zero_bit(&val, map_depth);
|
||||
if (nr + nr_tags <= map_depth) {
|
||||
atomic_long_t *ptr = (atomic_long_t *) &map->word;
|
||||
unsigned long val;
|
||||
|
||||
get_mask = ((1UL << nr_tags) - 1) << nr;
|
||||
val = READ_ONCE(map->word);
|
||||
while (!atomic_long_try_cmpxchg(ptr, &val,
|
||||
get_mask | val))
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user