mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 05:02:12 +00:00
for-6.11/block-20240710
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmaOTd8QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgppqIEACUr8Vv2FtezvT3OfVSlYWHHLXzkRhwEG5s vdk0o7Ow6U54sMjfymbHTgLD0ZOJf3uJ6BI95FQuW41jPzDFVbx4Hy8QzqonMkw9 1D/YQ4zrVL2mOKBzATbKpoGJzMOzGeoXEueFZ1AYPAX7RrDtP4xPQNfrcfkdE2zF LycJN70Vp6lrZZMuI9yb9ts1tf7TFzK0HJANxOAKTgSiPmBmxesjkJlhrdUrgkAU qDVyjj7u/ssndBJAb9i6Bl95Do8s9t4DeJq5/6wgKqtf5hClMXzPVB8Wy084gr6E rTRsCEhOug3qEZSqfAgAxnd3XFRNc/p2KMUe5YZ4mAqux4hpSmIQQDM/5X5K9vEv f4MNqUGlqyqntZx+KPyFpf7kLHFYS1qK4ub0FojWJEY4GrbBPNjjncLJ9+ozR0c8 kNDaFjMNAjalBee1FxNNH8LdVcd28rrCkPxRLEfO/gvBMUmvJf4ZyKmSED0v5DhY vZqKlBqG+wg0EXvdiWEHMDh9Y+q/2XBIkS6NN/Bhh61HNu+XzC838ts1X7lR+4o2 AM5Vapw+v0q6kFBMRP3IcJI/c0UcIU8EQU7axMyzWtvhog8kx8x01hIj1L4UyYYr rUdWrkugBVXJbywFuH/QIJxWxS/z4JdSw5VjASJLIrXy+aANmmG9Wonv95eyhpUv 5iv+EdRSNA== =wVi8 -----END PGP SIGNATURE----- Merge tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - NVMe updates via Keith: - Device initialization memory leak fixes (Keith) - More constants defined (Weiwen) - Target debugfs support (Hannes) - PCIe subsystem reset enhancements (Keith) - Queue-depth multipath policy (Redhat and PureStorage) - Implement get_unique_id (Christoph) - Authentication error fixes (Gaosheng) - MD updates via Song - sync_action fix and refactoring (Yu Kuai) - Various small fixes (Christoph Hellwig, Li Nan, and Ofir Gal, Yu Kuai, Benjamin Marzinski, Christophe JAILLET, Yang Li) - Fix loop detach/open race (Gulam) - Fix lower control limit for blk-throttle (Yu) - Add module descriptions to various drivers (Jeff) - Add support for atomic writes for block devices, and statx reporting for same. Includes SCSI and NVMe (John, Prasad, Alan) - Add IO priority information to block trace points (Dongliang) - Various zone improvements and tweaks (Damien) - mq-deadline tag reservation improvements (Bart) - Ignore direct reclaim swap writes in writeback throttling (Baokun) - Block integrity improvements and fixes (Anuj) - Add basic support for rust based block drivers. Has a dummy null_blk variant for now (Andreas) - Series converting driver settings to queue limits, and cleanups and fixes related to that (Christoph) - Cleanup for poking too deeply into the bvec internals, in preparation for DMA mapping API changes (Christoph) - Various minor tweaks and fixes (Jiapeng, John, Kanchan, Mikulas, Ming, Zhu, Damien, Christophe, Chaitanya) * tag 'for-6.11/block-20240710' of git://git.kernel.dk/linux: (206 commits) floppy: add missing MODULE_DESCRIPTION() macro loop: add missing MODULE_DESCRIPTION() macro ublk_drv: add missing MODULE_DESCRIPTION() macro xen/blkback: add missing MODULE_DESCRIPTION() macro block/rnbd: Constify struct kobj_type block: take offset into account in blk_bvec_map_sg again block: fix get_max_segment_size() warning loop: Don't bother validating blocksize virtio_blk: Don't bother validating blocksize null_blk: Don't bother validating blocksize block: Validate logical block size in blk_validate_limits() virtio_blk: Fix default logical block size fallback nvmet-auth: fix nvmet_auth hash error handling nvme: implement ->get_unique_id block: pass a phys_addr_t to get_max_segment_size block: add a bvec_phys helper blk-lib: check for kill signal in ioctl BLKZEROOUT block: limit the Write Zeroes to manually writing zeroes fallback block: refacto blkdev_issue_zeroout block: move read-only and supported checks into (__)blkdev_issue_zeroout ...
This commit is contained in:
commit
3e78198862
1
.mailmap
1
.mailmap
@ -690,6 +690,7 @@ Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
|
||||
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
|
||||
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
|
||||
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
|
||||
Weiwen Hu <huweiwen@linux.alibaba.com> <sehuww@mail.scut.edu.cn>
|
||||
WeiXiong Liao <gmpy.liaowx@gmail.com> <liaoweixiong@allwinnertech.com>
|
||||
Wen Gong <quic_wgong@quicinc.com> <wgong@codeaurora.org>
|
||||
Wesley Cheng <quic_wcheng@quicinc.com> <wcheng@codeaurora.org>
|
||||
|
@ -21,6 +21,59 @@ Description:
|
||||
device is offset from the internal allocation unit's
|
||||
natural alignment.
|
||||
|
||||
What: /sys/block/<disk>/atomic_write_max_bytes
|
||||
Date: February 2024
|
||||
Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
|
||||
Description:
|
||||
[RO] This parameter specifies the maximum atomic write
|
||||
size reported by the device. This parameter is relevant
|
||||
for merging of writes, where a merged atomic write
|
||||
operation must not exceed this number of bytes.
|
||||
This parameter may be greater than the value in
|
||||
atomic_write_unit_max_bytes as
|
||||
atomic_write_unit_max_bytes will be rounded down to a
|
||||
power-of-two and atomic_write_unit_max_bytes may also be
|
||||
limited by some other queue limits, such as max_segments.
|
||||
This parameter - along with atomic_write_unit_min_bytes
|
||||
and atomic_write_unit_max_bytes - will not be larger than
|
||||
max_hw_sectors_kb, but may be larger than max_sectors_kb.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/atomic_write_unit_min_bytes
|
||||
Date: February 2024
|
||||
Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
|
||||
Description:
|
||||
[RO] This parameter specifies the smallest block which can
|
||||
be written atomically with an atomic write operation. All
|
||||
atomic write operations must begin at a
|
||||
atomic_write_unit_min boundary and must be multiples of
|
||||
atomic_write_unit_min. This value must be a power-of-two.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/atomic_write_unit_max_bytes
|
||||
Date: February 2024
|
||||
Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
|
||||
Description:
|
||||
[RO] This parameter defines the largest block which can be
|
||||
written atomically with an atomic write operation. This
|
||||
value must be a multiple of atomic_write_unit_min and must
|
||||
be a power-of-two. This value will not be larger than
|
||||
atomic_write_max_bytes.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/atomic_write_boundary_bytes
|
||||
Date: February 2024
|
||||
Contact: Himanshu Madhani <himanshu.madhani@oracle.com>
|
||||
Description:
|
||||
[RO] A device may need to internally split an atomic write I/O
|
||||
which straddles a given logical block address boundary. This
|
||||
parameter specifies the size in bytes of the atomic boundary if
|
||||
one is reported by the device. This value must be a
|
||||
power-of-two and at least the size as in
|
||||
atomic_write_unit_max_bytes.
|
||||
Any attempt to merge atomic write I/Os must not result in a
|
||||
merged I/O which crosses this boundary (if any).
|
||||
|
||||
|
||||
What: /sys/block/<disk>/diskseq
|
||||
Date: February 2021
|
||||
|
@ -153,18 +153,11 @@ bio_free() will automatically free the bip.
|
||||
4.2 Block Device
|
||||
----------------
|
||||
|
||||
Because the format of the protection data is tied to the physical
|
||||
disk, each block device has been extended with a block integrity
|
||||
profile (struct blk_integrity). This optional profile is registered
|
||||
with the block layer using blk_integrity_register().
|
||||
|
||||
The profile contains callback functions for generating and verifying
|
||||
the protection data, as well as getting and setting application tags.
|
||||
The profile also contains a few constants to aid in completing,
|
||||
merging and splitting the integrity metadata.
|
||||
Block devices can set up the integrity information in the integrity
|
||||
sub-struture of the queue_limits structure.
|
||||
|
||||
Layered block devices will need to pick a profile that's appropriate
|
||||
for all subdevices. blk_integrity_compare() can help with that. DM
|
||||
for all subdevices. queue_limits_stack_integrity() can help with that. DM
|
||||
and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
|
||||
will require extra work due to the application tag.
|
||||
|
||||
@ -250,42 +243,6 @@ will require extra work due to the application tag.
|
||||
integrity upon completion.
|
||||
|
||||
|
||||
5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata
|
||||
--------------------------------------------------------------------------
|
||||
|
||||
To enable integrity exchange on a block device the gendisk must be
|
||||
registered as capable:
|
||||
|
||||
`int blk_integrity_register(gendisk, blk_integrity);`
|
||||
|
||||
The blk_integrity struct is a template and should contain the
|
||||
following::
|
||||
|
||||
static struct blk_integrity my_profile = {
|
||||
.name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
|
||||
.generate_fn = my_generate_fn,
|
||||
.verify_fn = my_verify_fn,
|
||||
.tuple_size = sizeof(struct my_tuple_size),
|
||||
.tag_size = <tag bytes per hw sector>,
|
||||
};
|
||||
|
||||
'name' is a text string which will be visible in sysfs. This is
|
||||
part of the userland API so chose it carefully and never change
|
||||
it. The format is standards body-type-variant.
|
||||
E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
|
||||
|
||||
'generate_fn' generates appropriate integrity metadata (for WRITE).
|
||||
|
||||
'verify_fn' verifies that the data buffer matches the integrity
|
||||
metadata.
|
||||
|
||||
'tuple_size' must be set to match the size of the integrity
|
||||
metadata per sector. I.e. 8 for DIF and EPP.
|
||||
|
||||
'tag_size' must be set to identify how many bytes of tag space
|
||||
are available per hardware sector. For DIF this is either 2 or
|
||||
0 depending on the value of the Control Mode Page ATO bit.
|
||||
|
||||
----------------------------------------------------------------------
|
||||
|
||||
2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
|
||||
|
@ -46,41 +46,50 @@ worry if the underlying devices need any explicit cache flushing and how
|
||||
the Forced Unit Access is implemented. The REQ_PREFLUSH and REQ_FUA flags
|
||||
may both be set on a single bio.
|
||||
|
||||
|
||||
Implementation details for bio based block drivers
|
||||
--------------------------------------------------------------
|
||||
|
||||
These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
|
||||
directly below the submit_bio interface. For remapping drivers the REQ_FUA
|
||||
bits need to be propagated to underlying devices, and a global flush needs
|
||||
to be implemented for bios with the REQ_PREFLUSH bit set. For real device
|
||||
drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
|
||||
on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
|
||||
data can be completed successfully without doing any work. Drivers for
|
||||
devices with volatile caches need to implement the support for these
|
||||
flags themselves without any help from the block layer.
|
||||
|
||||
|
||||
Implementation details for request_fn based block drivers
|
||||
---------------------------------------------------------
|
||||
Feature settings for block drivers
|
||||
----------------------------------
|
||||
|
||||
For devices that do not support volatile write caches there is no driver
|
||||
support required, the block layer completes empty REQ_PREFLUSH requests before
|
||||
entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
|
||||
requests that have a payload. For devices with volatile write caches the
|
||||
driver needs to tell the block layer that it supports flushing caches by
|
||||
doing::
|
||||
requests that have a payload.
|
||||
|
||||
blk_queue_write_cache(sdkp->disk->queue, true, false);
|
||||
For devices with volatile write caches the driver needs to tell the block layer
|
||||
that it supports flushing caches by setting the
|
||||
|
||||
and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn. Note that
|
||||
REQ_PREFLUSH requests with a payload are automatically turned into a sequence
|
||||
of an empty REQ_OP_FLUSH request followed by the actual write by the block
|
||||
layer. For devices that also support the FUA bit the block layer needs
|
||||
to be told to pass through the REQ_FUA bit using::
|
||||
BLK_FEAT_WRITE_CACHE
|
||||
|
||||
blk_queue_write_cache(sdkp->disk->queue, true, true);
|
||||
flag in the queue_limits feature field. For devices that also support the FUA
|
||||
bit the block layer needs to be told to pass on the REQ_FUA bit by also setting
|
||||
the
|
||||
|
||||
and the driver must handle write requests that have the REQ_FUA bit set
|
||||
in prep_fn/request_fn. If the FUA bit is not natively supported the block
|
||||
layer turns it into an empty REQ_OP_FLUSH request after the actual write.
|
||||
BLK_FEAT_FUA
|
||||
|
||||
flag in the features field of the queue_limits structure.
|
||||
|
||||
Implementation details for bio based block drivers
|
||||
--------------------------------------------------
|
||||
|
||||
For bio based drivers the REQ_PREFLUSH and REQ_FUA bit are simply passed on to
|
||||
the driver if the driver sets the BLK_FEAT_WRITE_CACHE flag and the driver
|
||||
needs to handle them.
|
||||
|
||||
*NOTE*: The REQ_FUA bit also gets passed on when the BLK_FEAT_FUA flags is
|
||||
_not_ set. Any bio based driver that sets BLK_FEAT_WRITE_CACHE also needs to
|
||||
handle REQ_FUA.
|
||||
|
||||
For remapping drivers the REQ_FUA bits need to be propagated to underlying
|
||||
devices, and a global flush needs to be implemented for bios with the
|
||||
REQ_PREFLUSH bit set.
|
||||
|
||||
Implementation details for blk-mq drivers
|
||||
-----------------------------------------
|
||||
|
||||
When the BLK_FEAT_WRITE_CACHE flag is set, REQ_OP_WRITE | REQ_PREFLUSH requests
|
||||
with a payload are automatically turned into a sequence of a REQ_OP_FLUSH
|
||||
request followed by the actual write by the block layer.
|
||||
|
||||
When the BLK_FEAT_FUA flags is set, the REQ_FUA bit is simply passed on for the
|
||||
REQ_OP_WRITE request, else a REQ_OP_FLUSH request is sent by the block layer
|
||||
after the completion of the write request for bio submissions with the REQ_FUA
|
||||
bit set.
|
||||
|
14
MAINTAINERS
14
MAINTAINERS
@ -3759,6 +3759,20 @@ F: include/linux/blk*
|
||||
F: kernel/trace/blktrace.c
|
||||
F: lib/sbitmap.c
|
||||
|
||||
BLOCK LAYER DEVICE DRIVER API [RUST]
|
||||
M: Andreas Hindborg <a.hindborg@samsung.com>
|
||||
R: Boqun Feng <boqun.feng@gmail.com>
|
||||
L: linux-block@vger.kernel.org
|
||||
L: rust-for-linux@vger.kernel.org
|
||||
S: Supported
|
||||
W: https://rust-for-linux.com
|
||||
B: https://github.com/Rust-for-Linux/linux/issues
|
||||
C: https://rust-for-linux.zulipchat.com/#narrow/stream/Block
|
||||
T: git https://github.com/Rust-for-Linux/linux.git rust-block-next
|
||||
F: drivers/block/rnull.rs
|
||||
F: rust/kernel/block.rs
|
||||
F: rust/kernel/block/
|
||||
|
||||
BLOCK2MTD DRIVER
|
||||
M: Joern Engel <joern@lazybastard.org>
|
||||
L: linux-mtd@lists.infradead.org
|
||||
|
@ -71,7 +71,7 @@ static void nfhd_submit_bio(struct bio *bio)
|
||||
len = bvec.bv_len;
|
||||
len >>= 9;
|
||||
nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
|
||||
page_to_phys(bvec.bv_page) + bvec.bv_offset);
|
||||
bvec_phys(&bvec));
|
||||
sec += len;
|
||||
}
|
||||
bio_endio(bio);
|
||||
@ -98,6 +98,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 bsize)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.logical_block_size = bsize,
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
struct nfhd_device *dev;
|
||||
int dev_id = id - NFHD_DEV_OFFSET;
|
||||
|
@ -447,43 +447,31 @@ static int bulk_req_safe_read(
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Called without dev->lock held, and only in interrupt context. */
|
||||
static void ubd_handler(void)
|
||||
static void ubd_end_request(struct io_thread_req *io_req)
|
||||
{
|
||||
int n;
|
||||
int count;
|
||||
|
||||
while(1){
|
||||
n = bulk_req_safe_read(
|
||||
thread_fd,
|
||||
irq_req_buffer,
|
||||
&irq_remainder,
|
||||
&irq_remainder_size,
|
||||
UBD_REQ_BUFFER_SIZE
|
||||
);
|
||||
if (n < 0) {
|
||||
if(n == -EAGAIN)
|
||||
break;
|
||||
printk(KERN_ERR "spurious interrupt in ubd_handler, "
|
||||
"err = %d\n", -n);
|
||||
return;
|
||||
}
|
||||
for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
|
||||
struct io_thread_req *io_req = (*irq_req_buffer)[count];
|
||||
|
||||
if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
|
||||
blk_queue_max_discard_sectors(io_req->req->q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
|
||||
if (io_req->error == BLK_STS_NOTSUPP) {
|
||||
if (req_op(io_req->req) == REQ_OP_DISCARD)
|
||||
blk_queue_disable_discard(io_req->req->q);
|
||||
else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
|
||||
blk_queue_disable_write_zeroes(io_req->req->q);
|
||||
}
|
||||
blk_mq_end_request(io_req->req, io_req->error);
|
||||
kfree(io_req);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static irqreturn_t ubd_intr(int irq, void *dev)
|
||||
{
|
||||
ubd_handler();
|
||||
int len, i;
|
||||
|
||||
while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
|
||||
&irq_remainder, &irq_remainder_size,
|
||||
UBD_REQ_BUFFER_SIZE)) >= 0) {
|
||||
for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
|
||||
ubd_end_request((*irq_req_buffer)[i]);
|
||||
}
|
||||
|
||||
if (len < 0 && len != -EAGAIN)
|
||||
pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@ -847,6 +835,7 @@ static int ubd_add(int n, char **error_out)
|
||||
struct queue_limits lim = {
|
||||
.max_segments = MAX_SG,
|
||||
.seg_boundary_mask = PAGE_SIZE - 1,
|
||||
.features = BLK_FEAT_WRITE_CACHE,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
int err = 0;
|
||||
@ -893,8 +882,6 @@ static int ubd_add(int n, char **error_out)
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_write_cache(disk->queue, true, false);
|
||||
disk->major = UBD_MAJOR;
|
||||
disk->first_minor = n << UBD_SHIFT;
|
||||
disk->minors = 1 << UBD_SHIFT;
|
||||
|
@ -263,6 +263,9 @@ static const struct proc_ops simdisk_proc_ops = {
|
||||
static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
struct proc_dir_entry *procdir)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
char tmp[2] = { '0' + which, 0 };
|
||||
int err;
|
||||
|
||||
@ -271,7 +274,7 @@ static int __init simdisk_setup(struct simdisk *dev, int which,
|
||||
spin_lock_init(&dev->lock);
|
||||
dev->users = 0;
|
||||
|
||||
dev->gd = blk_alloc_disk(NULL, NUMA_NO_NODE);
|
||||
dev->gd = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(dev->gd)) {
|
||||
err = PTR_ERR(dev->gd);
|
||||
goto out;
|
||||
|
@ -62,6 +62,8 @@ config BLK_DEV_BSGLIB
|
||||
|
||||
config BLK_DEV_INTEGRITY
|
||||
bool "Block layer data integrity support"
|
||||
select CRC_T10DIF
|
||||
select CRC64_ROCKSOFT
|
||||
help
|
||||
Some storage devices allow extra information to be
|
||||
stored/retrieved to help protect the data. The block layer
|
||||
@ -72,12 +74,6 @@ config BLK_DEV_INTEGRITY
|
||||
T10/SCSI Data Integrity Field or the T13/ATA External Path
|
||||
Protection. If in doubt, say N.
|
||||
|
||||
config BLK_DEV_INTEGRITY_T10
|
||||
tristate
|
||||
depends on BLK_DEV_INTEGRITY
|
||||
select CRC_T10DIF
|
||||
select CRC64_ROCKSOFT
|
||||
|
||||
config BLK_DEV_WRITE_MOUNTED
|
||||
bool "Allow writing to mounted block devices"
|
||||
default y
|
||||
|
@ -26,8 +26,7 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
|
||||
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
|
||||
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o
|
||||
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
|
||||
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
|
||||
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
|
||||
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
|
||||
|
35
block/bdev.c
35
block/bdev.c
@ -385,7 +385,7 @@ static struct file_system_type bd_type = {
|
||||
};
|
||||
|
||||
struct super_block *blockdev_superblock __ro_after_init;
|
||||
struct vfsmount *blockdev_mnt __ro_after_init;
|
||||
static struct vfsmount *blockdev_mnt __ro_after_init;
|
||||
EXPORT_SYMBOL_GPL(blockdev_superblock);
|
||||
|
||||
void __init bdev_cache_init(void)
|
||||
@ -1260,23 +1260,42 @@ void sync_bdevs(bool wait)
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle STATX_DIOALIGN for block devices.
|
||||
*
|
||||
* Note that the inode passed to this is the inode of a block device node file,
|
||||
* not the block device's internal inode. Therefore it is *not* valid to use
|
||||
* I_BDEV() here; the block device has to be looked up by i_rdev instead.
|
||||
* Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
|
||||
*/
|
||||
void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
|
||||
void bdev_statx(struct path *path, struct kstat *stat,
|
||||
u32 request_mask)
|
||||
{
|
||||
struct inode *backing_inode;
|
||||
struct block_device *bdev;
|
||||
|
||||
bdev = blkdev_get_no_open(inode->i_rdev);
|
||||
if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC)))
|
||||
return;
|
||||
|
||||
backing_inode = d_backing_inode(path->dentry);
|
||||
|
||||
/*
|
||||
* Note that backing_inode is the inode of a block device node file,
|
||||
* not the block device's internal inode. Therefore it is *not* valid
|
||||
* to use I_BDEV() here; the block device has to be looked up by i_rdev
|
||||
* instead.
|
||||
*/
|
||||
bdev = blkdev_get_no_open(backing_inode->i_rdev);
|
||||
if (!bdev)
|
||||
return;
|
||||
|
||||
if (request_mask & STATX_DIOALIGN) {
|
||||
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
|
||||
stat->dio_offset_align = bdev_logical_block_size(bdev);
|
||||
stat->result_mask |= STATX_DIOALIGN;
|
||||
}
|
||||
|
||||
if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) {
|
||||
struct request_queue *bd_queue = bdev->bd_queue;
|
||||
|
||||
generic_fill_statx_atomic_writes(stat,
|
||||
queue_atomic_write_unit_min_bytes(bd_queue),
|
||||
queue_atomic_write_unit_max_bytes(bd_queue));
|
||||
}
|
||||
|
||||
blkdev_put_no_open(bdev);
|
||||
}
|
||||
|
@ -797,57 +797,6 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
|
||||
*/
|
||||
bfq_link_bfqg(bfqd, bfqg);
|
||||
__bfq_bic_change_cgroup(bfqd, bic, bfqg);
|
||||
/*
|
||||
* Update blkg_path for bfq_log_* functions. We cache this
|
||||
* path, and update it here, for the following
|
||||
* reasons. Operations on blkg objects in blk-cgroup are
|
||||
* protected with the request_queue lock, and not with the
|
||||
* lock that protects the instances of this scheduler
|
||||
* (bfqd->lock). This exposes BFQ to the following sort of
|
||||
* race.
|
||||
*
|
||||
* The blkg_lookup performed in bfq_get_queue, protected
|
||||
* through rcu, may happen to return the address of a copy of
|
||||
* the original blkg. If this is the case, then the
|
||||
* bfqg_and_blkg_get performed in bfq_get_queue, to pin down
|
||||
* the blkg, is useless: it does not prevent blk-cgroup code
|
||||
* from destroying both the original blkg and all objects
|
||||
* directly or indirectly referred by the copy of the
|
||||
* blkg.
|
||||
*
|
||||
* On the bright side, destroy operations on a blkg invoke, as
|
||||
* a first step, hooks of the scheduler associated with the
|
||||
* blkg. And these hooks are executed with bfqd->lock held for
|
||||
* BFQ. As a consequence, for any blkg associated with the
|
||||
* request queue this instance of the scheduler is attached
|
||||
* to, we are guaranteed that such a blkg is not destroyed, and
|
||||
* that all the pointers it contains are consistent, while we
|
||||
* are holding bfqd->lock. A blkg_lookup performed with
|
||||
* bfqd->lock held then returns a fully consistent blkg, which
|
||||
* remains consistent until this lock is held.
|
||||
*
|
||||
* Thanks to the last fact, and to the fact that: (1) bfqg has
|
||||
* been obtained through a blkg_lookup in the above
|
||||
* assignment, and (2) bfqd->lock is being held, here we can
|
||||
* safely use the policy data for the involved blkg (i.e., the
|
||||
* field bfqg->pd) to get to the blkg associated with bfqg,
|
||||
* and then we can safely use any field of blkg. After we
|
||||
* release bfqd->lock, even just getting blkg through this
|
||||
* bfqg may cause dangling references to be traversed, as
|
||||
* bfqg->pd may not exist any more.
|
||||
*
|
||||
* In view of the above facts, here we cache, in the bfqg, any
|
||||
* blkg data we may need for this bic, and for its associated
|
||||
* bfq_queue. As of now, we need to cache only the path of the
|
||||
* blkg, which is used in the bfq_log_* functions.
|
||||
*
|
||||
* Finally, note that bfqg itself needs to be protected from
|
||||
* destruction on the blkg_free of the original blkg (which
|
||||
* invokes bfq_pd_free). We use an additional private
|
||||
* refcounter for bfqg, to let it disappear only after no
|
||||
* bfq_queue refers to it any longer.
|
||||
*/
|
||||
blkg_path(bfqg_to_blkg(bfqg), bfqg->blkg_path, sizeof(bfqg->blkg_path));
|
||||
bic->blkcg_serial_nr = serial_nr;
|
||||
}
|
||||
|
||||
|
@ -5463,29 +5463,10 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync,
|
||||
}
|
||||
}
|
||||
|
||||
static void bfq_exit_icq(struct io_cq *icq)
|
||||
static void _bfq_exit_icq(struct bfq_io_cq *bic, unsigned int num_actuators)
|
||||
{
|
||||
struct bfq_io_cq *bic = icq_to_bic(icq);
|
||||
struct bfq_data *bfqd = bic_to_bfqd(bic);
|
||||
unsigned long flags;
|
||||
unsigned int act_idx;
|
||||
/*
|
||||
* If bfqd and thus bfqd->num_actuators is not available any
|
||||
* longer, then cycle over all possible per-actuator bfqqs in
|
||||
* next loop. We rely on bic being zeroed on creation, and
|
||||
* therefore on its unused per-actuator fields being NULL.
|
||||
*/
|
||||
unsigned int num_actuators = BFQ_MAX_ACTUATORS;
|
||||
struct bfq_iocq_bfqq_data *bfqq_data = bic->bfqq_data;
|
||||
|
||||
/*
|
||||
* bfqd is NULL if scheduler already exited, and in that case
|
||||
* this is the last time these queues are accessed.
|
||||
*/
|
||||
if (bfqd) {
|
||||
spin_lock_irqsave(&bfqd->lock, flags);
|
||||
num_actuators = bfqd->num_actuators;
|
||||
}
|
||||
unsigned int act_idx;
|
||||
|
||||
for (act_idx = 0; act_idx < num_actuators; act_idx++) {
|
||||
if (bfqq_data[act_idx].stable_merge_bfqq)
|
||||
@ -5494,9 +5475,30 @@ static void bfq_exit_icq(struct io_cq *icq)
|
||||
bfq_exit_icq_bfqq(bic, true, act_idx);
|
||||
bfq_exit_icq_bfqq(bic, false, act_idx);
|
||||
}
|
||||
}
|
||||
|
||||
if (bfqd)
|
||||
static void bfq_exit_icq(struct io_cq *icq)
|
||||
{
|
||||
struct bfq_io_cq *bic = icq_to_bic(icq);
|
||||
struct bfq_data *bfqd = bic_to_bfqd(bic);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If bfqd and thus bfqd->num_actuators is not available any
|
||||
* longer, then cycle over all possible per-actuator bfqqs in
|
||||
* next loop. We rely on bic being zeroed on creation, and
|
||||
* therefore on its unused per-actuator fields being NULL.
|
||||
*
|
||||
* bfqd is NULL if scheduler already exited, and in that case
|
||||
* this is the last time these queues are accessed.
|
||||
*/
|
||||
if (bfqd) {
|
||||
spin_lock_irqsave(&bfqd->lock, flags);
|
||||
_bfq_exit_icq(bic, bfqd->num_actuators);
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
} else {
|
||||
_bfq_exit_icq(bic, BFQ_MAX_ACTUATORS);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1003,9 +1003,6 @@ struct bfq_group {
|
||||
/* must be the first member */
|
||||
struct blkg_policy_data pd;
|
||||
|
||||
/* cached path for this blkg (see comments in bfq_bic_update_cgroup) */
|
||||
char blkg_path[128];
|
||||
|
||||
/* reference counter (see comments in bfq_bic_update_cgroup) */
|
||||
refcount_t ref;
|
||||
|
||||
|
@ -76,7 +76,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
|
||||
&bip->bip_max_vcnt, gfp_mask);
|
||||
if (!bip->bip_vec)
|
||||
goto err;
|
||||
} else {
|
||||
} else if (nr_vecs) {
|
||||
bip->bip_vec = bip->bip_inline_vecs;
|
||||
}
|
||||
|
||||
@ -276,6 +276,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
|
||||
|
||||
bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
bip->bip_vcnt = nr_vecs;
|
||||
return 0;
|
||||
free_bip:
|
||||
bio_integrity_free(bio);
|
||||
@ -297,6 +298,7 @@ static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
|
||||
bip->bip_flags |= BIP_INTEGRITY_USER;
|
||||
bip->bip_iter.bi_sector = seed;
|
||||
bip->bip_iter.bi_size = len;
|
||||
bip->bip_vcnt = nr_vecs;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -334,7 +336,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
|
||||
u32 seed)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
|
||||
unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
|
||||
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
|
||||
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
|
||||
unsigned int direction, nr_bvecs;
|
||||
@ -396,44 +398,6 @@ free_bvec:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
|
||||
|
||||
/**
|
||||
* bio_integrity_process - Process integrity metadata for a bio
|
||||
* @bio: bio to generate/verify integrity metadata for
|
||||
* @proc_iter: iterator to process
|
||||
* @proc_fn: Pointer to the relevant processing function
|
||||
*/
|
||||
static blk_status_t bio_integrity_process(struct bio *bio,
|
||||
struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn)
|
||||
{
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct blk_integrity_iter iter;
|
||||
struct bvec_iter bviter;
|
||||
struct bio_vec bv;
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.tuple_size = bi->tuple_size;
|
||||
iter.seed = proc_iter->bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
iter.pi_offset = bi->pi_offset;
|
||||
|
||||
__bio_for_each_segment(bv, bio, bviter, *proc_iter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
|
||||
iter.data_buf = kaddr;
|
||||
iter.data_size = bv.bv_len;
|
||||
ret = proc_fn(&iter);
|
||||
kunmap_local(kaddr);
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* bio_integrity_prep - Prepare bio for integrity I/O
|
||||
* @bio: bio to prepare
|
||||
@ -450,17 +414,13 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
{
|
||||
struct bio_integrity_payload *bip;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
unsigned int len;
|
||||
void *buf;
|
||||
unsigned long start, end;
|
||||
unsigned int len, nr_pages;
|
||||
unsigned int bytes, offset, i;
|
||||
gfp_t gfp = GFP_NOIO;
|
||||
|
||||
if (!bi)
|
||||
return true;
|
||||
|
||||
if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
|
||||
return true;
|
||||
|
||||
if (!bio_sectors(bio))
|
||||
return true;
|
||||
|
||||
@ -468,32 +428,36 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
if (bio_integrity(bio))
|
||||
return true;
|
||||
|
||||
if (bio_data_dir(bio) == READ) {
|
||||
if (!bi->profile->verify_fn ||
|
||||
!(bi->flags & BLK_INTEGRITY_VERIFY))
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_READ:
|
||||
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
|
||||
return true;
|
||||
} else {
|
||||
if (!bi->profile->generate_fn ||
|
||||
!(bi->flags & BLK_INTEGRITY_GENERATE))
|
||||
break;
|
||||
case REQ_OP_WRITE:
|
||||
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Zero the memory allocated to not leak uninitialized kernel
|
||||
* memory to disk for non-integrity metadata where nothing else
|
||||
* initializes the memory.
|
||||
*/
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
|
||||
gfp |= __GFP_ZERO;
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Allocate kernel buffer for protection data */
|
||||
len = bio_integrity_bytes(bi, bio_sectors(bio));
|
||||
buf = kmalloc(len, GFP_NOIO);
|
||||
buf = kmalloc(len, gfp);
|
||||
if (unlikely(buf == NULL)) {
|
||||
printk(KERN_ERR "could not allocate integrity buffer\n");
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
start = ((unsigned long) buf) >> PAGE_SHIFT;
|
||||
nr_pages = end - start;
|
||||
|
||||
/* Allocate bio integrity payload and integrity vectors */
|
||||
bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
|
||||
bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
|
||||
if (IS_ERR(bip)) {
|
||||
printk(KERN_ERR "could not allocate data integrity bioset\n");
|
||||
kfree(buf);
|
||||
goto err_end_io;
|
||||
}
|
||||
@ -501,35 +465,20 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
|
||||
bip_set_seed(bip, bio->bi_iter.bi_sector);
|
||||
|
||||
if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
bip->bip_flags |= BIP_IP_CHECKSUM;
|
||||
|
||||
/* Map it */
|
||||
offset = offset_in_page(buf);
|
||||
for (i = 0; i < nr_pages && len > 0; i++) {
|
||||
bytes = PAGE_SIZE - offset;
|
||||
|
||||
if (bytes > len)
|
||||
bytes = len;
|
||||
|
||||
if (bio_integrity_add_page(bio, virt_to_page(buf),
|
||||
bytes, offset) < bytes) {
|
||||
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
|
||||
offset_in_page(buf)) < len) {
|
||||
printk(KERN_ERR "could not attach integrity payload\n");
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
buf += bytes;
|
||||
len -= bytes;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
/* Auto-generate integrity metadata if this is a write */
|
||||
if (bio_data_dir(bio) == WRITE) {
|
||||
bio_integrity_process(bio, &bio->bi_iter,
|
||||
bi->profile->generate_fn);
|
||||
} else {
|
||||
if (bio_data_dir(bio) == WRITE)
|
||||
blk_integrity_generate(bio);
|
||||
else
|
||||
bip->bio_iter = bio->bi_iter;
|
||||
}
|
||||
return true;
|
||||
|
||||
err_end_io:
|
||||
@ -552,15 +501,8 @@ static void bio_integrity_verify_fn(struct work_struct *work)
|
||||
struct bio_integrity_payload *bip =
|
||||
container_of(work, struct bio_integrity_payload, bip_work);
|
||||
struct bio *bio = bip->bip_bio;
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
|
||||
/*
|
||||
* At the moment verify is called bio's iterator was advanced
|
||||
* during split and completion, we need to rewind iterator to
|
||||
* it's original position.
|
||||
*/
|
||||
bio->bi_status = bio_integrity_process(bio, &bip->bio_iter,
|
||||
bi->profile->verify_fn);
|
||||
blk_integrity_verify(bio);
|
||||
bio_integrity_free(bio);
|
||||
bio_endio(bio);
|
||||
}
|
||||
@ -582,7 +524,7 @@ bool __bio_integrity_endio(struct bio *bio)
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
|
||||
(bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
|
||||
(bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->csum_type) {
|
||||
INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
|
||||
queue_work(kintegrityd_wq, &bip->bip_work);
|
||||
return false;
|
||||
@ -642,14 +584,11 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
|
||||
|
||||
BUG_ON(bip_src == NULL);
|
||||
|
||||
bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
|
||||
bip = bio_integrity_alloc(bio, gfp_mask, 0);
|
||||
if (IS_ERR(bip))
|
||||
return PTR_ERR(bip);
|
||||
|
||||
memcpy(bip->bip_vec, bip_src->bip_vec,
|
||||
bip_src->bip_vcnt * sizeof(struct bio_vec));
|
||||
|
||||
bip->bip_vcnt = bip_src->bip_vcnt;
|
||||
bip->bip_vec = bip_src->bip_vec;
|
||||
bip->bip_iter = bip_src->bip_iter;
|
||||
bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
|
||||
|
||||
|
@ -953,7 +953,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
|
||||
bool *same_page)
|
||||
{
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
|
||||
phys_addr_t addr1 = bvec_phys(bv);
|
||||
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
|
||||
|
||||
if ((addr1 | mask) != (addr2 | mask))
|
||||
|
@ -300,19 +300,6 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
|
||||
return cpd ? cpd->blkcg : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_path - format cgroup path of blkg
|
||||
* @blkg: blkg of interest
|
||||
* @buf: target buffer
|
||||
* @buflen: target buffer length
|
||||
*
|
||||
* Format the path of the cgroup of @blkg into @buf.
|
||||
*/
|
||||
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
|
||||
{
|
||||
return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_get - get a blkg reference
|
||||
* @blkg: blkg to get
|
||||
|
@ -94,20 +94,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_flag_clear);
|
||||
|
||||
/**
|
||||
* blk_queue_flag_test_and_set - atomically test and set a queue flag
|
||||
* @flag: flag to be set
|
||||
* @q: request queue
|
||||
*
|
||||
* Returns the previous value of @flag - 0 if the flag was not set and 1 if
|
||||
* the flag was already set.
|
||||
*/
|
||||
bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
|
||||
{
|
||||
return test_and_set_bit(flag, &q->queue_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
|
||||
|
||||
#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
|
||||
static const char *const blk_op_name[] = {
|
||||
REQ_OP_NAME(READ),
|
||||
@ -174,6 +160,8 @@ static const struct {
|
||||
/* Command duration limit device-side timeout */
|
||||
[BLK_STS_DURATION_LIMIT] = { -ETIME, "duration limit exceeded" },
|
||||
|
||||
[BLK_STS_INVAL] = { -EINVAL, "invalid" },
|
||||
|
||||
/* everything else not covered above: */
|
||||
[BLK_STS_IOERR] = { -EIO, "I/O" },
|
||||
};
|
||||
@ -739,6 +727,18 @@ void submit_bio_noacct_nocheck(struct bio *bio)
|
||||
__submit_bio_noacct(bio);
|
||||
}
|
||||
|
||||
static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
{
|
||||
if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
|
||||
return BLK_STS_INVAL;
|
||||
|
||||
if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
|
||||
return BLK_STS_INVAL;
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
|
||||
* @bio: The bio describing the location in memory and on the device.
|
||||
@ -782,7 +782,7 @@ void submit_bio_noacct(struct bio *bio)
|
||||
if (WARN_ON_ONCE(bio_op(bio) != REQ_OP_WRITE &&
|
||||
bio_op(bio) != REQ_OP_ZONE_APPEND))
|
||||
goto end_io;
|
||||
if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
|
||||
if (!bdev_write_cache(bdev)) {
|
||||
bio->bi_opf &= ~(REQ_PREFLUSH | REQ_FUA);
|
||||
if (!bio_sectors(bio)) {
|
||||
status = BLK_STS_OK;
|
||||
@ -791,12 +791,17 @@ void submit_bio_noacct(struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
|
||||
if (!(q->limits.features & BLK_FEAT_POLL))
|
||||
bio_clear_polled(bio);
|
||||
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_READ:
|
||||
case REQ_OP_WRITE:
|
||||
if (bio->bi_opf & REQ_ATOMIC) {
|
||||
status = blk_validate_atomic_write_op_size(q, bio);
|
||||
if (status != BLK_STS_OK)
|
||||
goto end_io;
|
||||
}
|
||||
break;
|
||||
case REQ_OP_FLUSH:
|
||||
/*
|
||||
@ -825,11 +830,8 @@ void submit_bio_noacct(struct bio *bio)
|
||||
case REQ_OP_ZONE_OPEN:
|
||||
case REQ_OP_ZONE_CLOSE:
|
||||
case REQ_OP_ZONE_FINISH:
|
||||
if (!bdev_is_zoned(bio->bi_bdev))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
|
||||
if (!bdev_is_zoned(bio->bi_bdev))
|
||||
goto not_supported;
|
||||
break;
|
||||
case REQ_OP_DRV_IN:
|
||||
@ -915,8 +917,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
|
||||
return 0;
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (cookie == BLK_QC_T_NONE ||
|
||||
!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
|
||||
if (cookie == BLK_QC_T_NONE || !(q->limits.features & BLK_FEAT_POLL))
|
||||
return 0;
|
||||
|
||||
blk_flush_plug(current->plug, false);
|
||||
|
@ -100,23 +100,6 @@ blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
|
||||
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
|
||||
}
|
||||
|
||||
static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
|
||||
{
|
||||
unsigned int policy = 0;
|
||||
|
||||
if (blk_rq_sectors(rq))
|
||||
policy |= REQ_FSEQ_DATA;
|
||||
|
||||
if (fflags & (1UL << QUEUE_FLAG_WC)) {
|
||||
if (rq->cmd_flags & REQ_PREFLUSH)
|
||||
policy |= REQ_FSEQ_PREFLUSH;
|
||||
if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
|
||||
(rq->cmd_flags & REQ_FUA))
|
||||
policy |= REQ_FSEQ_POSTFLUSH;
|
||||
}
|
||||
return policy;
|
||||
}
|
||||
|
||||
static unsigned int blk_flush_cur_seq(struct request *rq)
|
||||
{
|
||||
return 1 << ffz(rq->flush.seq);
|
||||
@ -399,19 +382,32 @@ static void blk_rq_init_flush(struct request *rq)
|
||||
bool blk_insert_flush(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned long fflags = q->queue_flags; /* may change, cache */
|
||||
unsigned int policy = blk_flush_policy(fflags, rq);
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, rq->mq_ctx);
|
||||
bool supports_fua = q->limits.features & BLK_FEAT_FUA;
|
||||
unsigned int policy = 0;
|
||||
|
||||
/* FLUSH/FUA request must never be merged */
|
||||
WARN_ON_ONCE(rq->bio != rq->biotail);
|
||||
|
||||
if (blk_rq_sectors(rq))
|
||||
policy |= REQ_FSEQ_DATA;
|
||||
|
||||
/*
|
||||
* Check which flushes we need to sequence for this operation.
|
||||
*/
|
||||
if (blk_queue_write_cache(q)) {
|
||||
if (rq->cmd_flags & REQ_PREFLUSH)
|
||||
policy |= REQ_FSEQ_PREFLUSH;
|
||||
if ((rq->cmd_flags & REQ_FUA) && !supports_fua)
|
||||
policy |= REQ_FSEQ_POSTFLUSH;
|
||||
}
|
||||
|
||||
/*
|
||||
* @policy now records what operations need to be done. Adjust
|
||||
* REQ_PREFLUSH and FUA for the driver.
|
||||
*/
|
||||
rq->cmd_flags &= ~REQ_PREFLUSH;
|
||||
if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
|
||||
if (!supports_fua)
|
||||
rq->cmd_flags &= ~REQ_FUA;
|
||||
|
||||
/*
|
||||
|
@ -107,60 +107,6 @@ new_segment:
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
|
||||
|
||||
/**
|
||||
* blk_integrity_compare - Compare integrity profile of two disks
|
||||
* @gd1: Disk to compare
|
||||
* @gd2: Disk to compare
|
||||
*
|
||||
* Description: Meta-devices like DM and MD need to verify that all
|
||||
* sub-devices use the same integrity format before advertising to
|
||||
* upper layers that they can send/receive integrity metadata. This
|
||||
* function can be used to check whether two gendisk devices have
|
||||
* compatible integrity formats.
|
||||
*/
|
||||
int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
|
||||
{
|
||||
struct blk_integrity *b1 = &gd1->queue->integrity;
|
||||
struct blk_integrity *b2 = &gd2->queue->integrity;
|
||||
|
||||
if (!b1->profile && !b2->profile)
|
||||
return 0;
|
||||
|
||||
if (!b1->profile || !b2->profile)
|
||||
return -1;
|
||||
|
||||
if (b1->interval_exp != b2->interval_exp) {
|
||||
pr_err("%s: %s/%s protection interval %u != %u\n",
|
||||
__func__, gd1->disk_name, gd2->disk_name,
|
||||
1 << b1->interval_exp, 1 << b2->interval_exp);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (b1->tuple_size != b2->tuple_size) {
|
||||
pr_err("%s: %s/%s tuple sz %u != %u\n", __func__,
|
||||
gd1->disk_name, gd2->disk_name,
|
||||
b1->tuple_size, b2->tuple_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
|
||||
pr_err("%s: %s/%s tag sz %u != %u\n", __func__,
|
||||
gd1->disk_name, gd2->disk_name,
|
||||
b1->tag_size, b2->tag_size);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (b1->profile != b2->profile) {
|
||||
pr_err("%s: %s/%s type %s != %s\n", __func__,
|
||||
gd1->disk_name, gd2->disk_name,
|
||||
b1->profile->name, b2->profile->name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_integrity_compare);
|
||||
|
||||
bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
|
||||
struct request *next)
|
||||
{
|
||||
@ -214,7 +160,64 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
|
||||
|
||||
static inline struct blk_integrity *dev_to_bi(struct device *dev)
|
||||
{
|
||||
return &dev_to_disk(dev)->queue->integrity;
|
||||
return &dev_to_disk(dev)->queue->limits.integrity;
|
||||
}
|
||||
|
||||
const char *blk_integrity_profile_name(struct blk_integrity *bi)
|
||||
{
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
return "T10-DIF-TYPE1-IP";
|
||||
return "T10-DIF-TYPE3-IP";
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
return "T10-DIF-TYPE1-CRC";
|
||||
return "T10-DIF-TYPE3-CRC";
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
return "EXT-DIF-TYPE1-CRC64";
|
||||
return "EXT-DIF-TYPE3-CRC64";
|
||||
case BLK_INTEGRITY_CSUM_NONE:
|
||||
break;
|
||||
}
|
||||
|
||||
return "nop";
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_integrity_profile_name);
|
||||
|
||||
static ssize_t flag_store(struct device *dev, const char *page, size_t count,
|
||||
unsigned char flag)
|
||||
{
|
||||
struct request_queue *q = dev_to_disk(dev)->queue;
|
||||
struct queue_limits lim;
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
err = kstrtoul(page, 10, &val);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* note that the flags are inverted vs the values in the sysfs files */
|
||||
lim = queue_limits_start_update(q);
|
||||
if (val)
|
||||
lim.integrity.flags &= ~flag;
|
||||
else
|
||||
lim.integrity.flags |= flag;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
if (err)
|
||||
return err;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t flag_show(struct device *dev, char *page, unsigned char flag)
|
||||
{
|
||||
struct blk_integrity *bi = dev_to_bi(dev);
|
||||
|
||||
return sysfs_emit(page, "%d\n", !(bi->flags & flag));
|
||||
}
|
||||
|
||||
static ssize_t format_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -222,9 +225,9 @@ static ssize_t format_show(struct device *dev, struct device_attribute *attr,
|
||||
{
|
||||
struct blk_integrity *bi = dev_to_bi(dev);
|
||||
|
||||
if (bi->profile && bi->profile->name)
|
||||
return sysfs_emit(page, "%s\n", bi->profile->name);
|
||||
if (!bi->tuple_size)
|
||||
return sysfs_emit(page, "none\n");
|
||||
return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi));
|
||||
}
|
||||
|
||||
static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr,
|
||||
@ -249,49 +252,26 @@ static ssize_t read_verify_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct blk_integrity *bi = dev_to_bi(dev);
|
||||
char *p = (char *) page;
|
||||
unsigned long val = simple_strtoul(p, &p, 10);
|
||||
|
||||
if (val)
|
||||
bi->flags |= BLK_INTEGRITY_VERIFY;
|
||||
else
|
||||
bi->flags &= ~BLK_INTEGRITY_VERIFY;
|
||||
|
||||
return count;
|
||||
return flag_store(dev, page, count, BLK_INTEGRITY_NOVERIFY);
|
||||
}
|
||||
|
||||
static ssize_t read_verify_show(struct device *dev,
|
||||
struct device_attribute *attr, char *page)
|
||||
{
|
||||
struct blk_integrity *bi = dev_to_bi(dev);
|
||||
|
||||
return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_VERIFY));
|
||||
return flag_show(dev, page, BLK_INTEGRITY_NOVERIFY);
|
||||
}
|
||||
|
||||
static ssize_t write_generate_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
struct blk_integrity *bi = dev_to_bi(dev);
|
||||
|
||||
char *p = (char *) page;
|
||||
unsigned long val = simple_strtoul(p, &p, 10);
|
||||
|
||||
if (val)
|
||||
bi->flags |= BLK_INTEGRITY_GENERATE;
|
||||
else
|
||||
bi->flags &= ~BLK_INTEGRITY_GENERATE;
|
||||
|
||||
return count;
|
||||
return flag_store(dev, page, count, BLK_INTEGRITY_NOGENERATE);
|
||||
}
|
||||
|
||||
static ssize_t write_generate_show(struct device *dev,
|
||||
struct device_attribute *attr, char *page)
|
||||
{
|
||||
struct blk_integrity *bi = dev_to_bi(dev);
|
||||
|
||||
return sysfs_emit(page, "%d\n", !!(bi->flags & BLK_INTEGRITY_GENERATE));
|
||||
return flag_show(dev, page, BLK_INTEGRITY_NOGENERATE);
|
||||
}
|
||||
|
||||
static ssize_t device_is_integrity_capable_show(struct device *dev,
|
||||
@ -325,81 +305,3 @@ const struct attribute_group blk_integrity_attr_group = {
|
||||
.name = "integrity",
|
||||
.attrs = integrity_attrs,
|
||||
};
|
||||
|
||||
static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void blk_integrity_nop_prepare(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void blk_integrity_nop_complete(struct request *rq,
|
||||
unsigned int nr_bytes)
|
||||
{
|
||||
}
|
||||
|
||||
static const struct blk_integrity_profile nop_profile = {
|
||||
.name = "nop",
|
||||
.generate_fn = blk_integrity_nop_fn,
|
||||
.verify_fn = blk_integrity_nop_fn,
|
||||
.prepare_fn = blk_integrity_nop_prepare,
|
||||
.complete_fn = blk_integrity_nop_complete,
|
||||
};
|
||||
|
||||
/**
|
||||
* blk_integrity_register - Register a gendisk as being integrity-capable
|
||||
* @disk: struct gendisk pointer to make integrity-aware
|
||||
* @template: block integrity profile to register
|
||||
*
|
||||
* Description: When a device needs to advertise itself as being able to
|
||||
* send/receive integrity metadata it must use this function to register
|
||||
* the capability with the block layer. The template is a blk_integrity
|
||||
* struct with values appropriate for the underlying hardware. See
|
||||
* Documentation/block/data-integrity.rst.
|
||||
*/
|
||||
void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
|
||||
{
|
||||
struct blk_integrity *bi = &disk->queue->integrity;
|
||||
|
||||
bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE |
|
||||
template->flags;
|
||||
bi->interval_exp = template->interval_exp ? :
|
||||
ilog2(queue_logical_block_size(disk->queue));
|
||||
bi->profile = template->profile ? template->profile : &nop_profile;
|
||||
bi->tuple_size = template->tuple_size;
|
||||
bi->tag_size = template->tag_size;
|
||||
bi->pi_offset = template->pi_offset;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
|
||||
|
||||
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
|
||||
if (disk->queue->crypto_profile) {
|
||||
pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
|
||||
disk->queue->crypto_profile = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(blk_integrity_register);
|
||||
|
||||
/**
|
||||
* blk_integrity_unregister - Unregister block integrity profile
|
||||
* @disk: disk whose integrity profile to unregister
|
||||
*
|
||||
* Description: This function unregisters the integrity capability from
|
||||
* a block device.
|
||||
*/
|
||||
void blk_integrity_unregister(struct gendisk *disk)
|
||||
{
|
||||
struct blk_integrity *bi = &disk->queue->integrity;
|
||||
|
||||
if (!bi->profile)
|
||||
return;
|
||||
|
||||
/* ensure all bios are off the integrity workqueue */
|
||||
blk_flush_integrity();
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue);
|
||||
memset(bi, 0, sizeof(*bi));
|
||||
}
|
||||
EXPORT_SYMBOL(blk_integrity_unregister);
|
||||
|
210
block/blk-lib.c
210
block/blk-lib.c
@ -103,38 +103,71 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_discard);
|
||||
|
||||
static int __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||
static sector_t bio_write_zeroes_limit(struct block_device *bdev)
|
||||
{
|
||||
sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
|
||||
return min(bdev_write_zeroes_sectors(bdev),
|
||||
(UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
|
||||
}
|
||||
|
||||
static void __blkdev_issue_write_zeroes(struct block_device *bdev,
|
||||
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
|
||||
struct bio **biop, unsigned flags)
|
||||
{
|
||||
struct bio *bio = *biop;
|
||||
unsigned int max_sectors;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
/* Ensure that max_sectors doesn't overflow bi_size */
|
||||
max_sectors = bdev_write_zeroes_sectors(bdev);
|
||||
|
||||
if (max_sectors == 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
while (nr_sects) {
|
||||
unsigned int len = min_t(sector_t, nr_sects, max_sectors);
|
||||
unsigned int len = min_t(sector_t, nr_sects,
|
||||
bio_write_zeroes_limit(bdev));
|
||||
struct bio *bio;
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) &&
|
||||
fatal_signal_pending(current))
|
||||
break;
|
||||
|
||||
bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
if (flags & BLKDEV_ZERO_NOUNMAP)
|
||||
bio->bi_opf |= REQ_NOUNMAP;
|
||||
|
||||
bio->bi_iter.bi_size = len << SECTOR_SHIFT;
|
||||
*biop = bio_chain_and_submit(*biop, bio);
|
||||
|
||||
nr_sects -= len;
|
||||
sector += len;
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
return 0;
|
||||
static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp, unsigned flags)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret = 0;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
__blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags);
|
||||
if (bio) {
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) &&
|
||||
fatal_signal_pending(current)) {
|
||||
bio_await_chain(bio);
|
||||
blk_finish_plug(&plug);
|
||||
return -EINTR;
|
||||
}
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
/*
|
||||
* For some devices there is no non-destructive way to verify whether
|
||||
* WRITE ZEROES is actually supported. These will clear the capability
|
||||
* on an I/O error, in which case we'll turn any error into
|
||||
* "not supported" here.
|
||||
*/
|
||||
if (ret && !bdev_write_zeroes_sectors(bdev))
|
||||
return -EOPNOTSUPP;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -150,35 +183,63 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
|
||||
return min(pages, (sector_t)BIO_MAX_VECS);
|
||||
}
|
||||
|
||||
static int __blkdev_issue_zero_pages(struct block_device *bdev,
|
||||
static void __blkdev_issue_zero_pages(struct block_device *bdev,
|
||||
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
|
||||
struct bio **biop)
|
||||
struct bio **biop, unsigned int flags)
|
||||
{
|
||||
struct bio *bio = *biop;
|
||||
int bi_size = 0;
|
||||
unsigned int sz;
|
||||
while (nr_sects) {
|
||||
unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
|
||||
struct bio *bio;
|
||||
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
while (nr_sects != 0) {
|
||||
bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects),
|
||||
REQ_OP_WRITE, gfp_mask);
|
||||
bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
|
||||
while (nr_sects != 0) {
|
||||
sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
|
||||
bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
|
||||
nr_sects -= bi_size >> 9;
|
||||
sector += bi_size >> 9;
|
||||
if (bi_size < sz)
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) &&
|
||||
fatal_signal_pending(current))
|
||||
break;
|
||||
}
|
||||
|
||||
do {
|
||||
unsigned int len, added;
|
||||
|
||||
len = min_t(sector_t,
|
||||
PAGE_SIZE, nr_sects << SECTOR_SHIFT);
|
||||
added = bio_add_page(bio, ZERO_PAGE(0), len, 0);
|
||||
if (added < len)
|
||||
break;
|
||||
nr_sects -= added >> SECTOR_SHIFT;
|
||||
sector += added >> SECTOR_SHIFT;
|
||||
} while (nr_sects);
|
||||
|
||||
*biop = bio_chain_and_submit(*biop, bio);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
*biop = bio;
|
||||
return 0;
|
||||
static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp, unsigned flags)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
int ret = 0;
|
||||
|
||||
if (flags & BLKDEV_ZERO_NOFALLBACK)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
__blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
|
||||
if (bio) {
|
||||
if ((flags & BLKDEV_ZERO_KILLABLE) &&
|
||||
fatal_signal_pending(current)) {
|
||||
bio_await_chain(bio);
|
||||
blk_finish_plug(&plug);
|
||||
return -EINTR;
|
||||
}
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -204,20 +265,19 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
|
||||
unsigned flags)
|
||||
{
|
||||
int ret;
|
||||
sector_t bs_mask;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
return -EINVAL;
|
||||
|
||||
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
|
||||
if (bdev_write_zeroes_sectors(bdev)) {
|
||||
__blkdev_issue_write_zeroes(bdev, sector, nr_sects,
|
||||
gfp_mask, biop, flags);
|
||||
} else {
|
||||
if (flags & BLKDEV_ZERO_NOFALLBACK)
|
||||
return -EOPNOTSUPP;
|
||||
__blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
|
||||
biop, flags);
|
||||
if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
|
||||
return ret;
|
||||
|
||||
return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
|
||||
biop);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__blkdev_issue_zeroout);
|
||||
|
||||
@ -237,52 +297,22 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
|
||||
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
|
||||
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
|
||||
{
|
||||
int ret = 0;
|
||||
sector_t bs_mask;
|
||||
struct bio *bio;
|
||||
struct blk_plug plug;
|
||||
bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
|
||||
int ret;
|
||||
|
||||
bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
|
||||
if ((sector | nr_sects) & bs_mask)
|
||||
if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1))
|
||||
return -EINVAL;
|
||||
if (bdev_read_only(bdev))
|
||||
return -EPERM;
|
||||
|
||||
retry:
|
||||
bio = NULL;
|
||||
blk_start_plug(&plug);
|
||||
if (try_write_zeroes) {
|
||||
ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
|
||||
gfp_mask, &bio, flags);
|
||||
} else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
|
||||
ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
|
||||
gfp_mask, &bio);
|
||||
} else {
|
||||
/* No zeroing offload support */
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
if (ret == 0 && bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
if (ret && try_write_zeroes) {
|
||||
if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
|
||||
try_write_zeroes = false;
|
||||
goto retry;
|
||||
}
|
||||
if (!bdev_write_zeroes_sectors(bdev)) {
|
||||
/*
|
||||
* Zeroing offload support was indicated, but the
|
||||
* device reported ILLEGAL REQUEST (for some devices
|
||||
* there is no non-destructive way to verify whether
|
||||
* WRITE ZEROES is actually supported).
|
||||
*/
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
if (bdev_write_zeroes_sectors(bdev)) {
|
||||
ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects,
|
||||
gfp_mask, flags);
|
||||
if (ret != -EOPNOTSUPP)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(blkdev_issue_zeroout);
|
||||
|
||||
int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
|
||||
|
@ -634,7 +634,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
|
||||
const struct iov_iter *iter, gfp_t gfp_mask)
|
||||
{
|
||||
bool copy = false, map_bvec = false;
|
||||
unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
|
||||
unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits);
|
||||
struct bio *bio = NULL;
|
||||
struct iov_iter i;
|
||||
int ret = -EINVAL;
|
||||
|
@ -154,6 +154,19 @@ static struct bio *bio_split_write_zeroes(struct bio *bio,
|
||||
return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
|
||||
}
|
||||
|
||||
static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
|
||||
bool is_atomic)
|
||||
{
|
||||
/*
|
||||
* chunk_sectors must be a multiple of atomic_write_boundary_sectors if
|
||||
* both non-zero.
|
||||
*/
|
||||
if (is_atomic && lim->atomic_write_boundary_sectors)
|
||||
return lim->atomic_write_boundary_sectors;
|
||||
|
||||
return lim->chunk_sectors;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the maximum number of sectors from the start of a bio that may be
|
||||
* submitted as a single request to a block device. If enough sectors remain,
|
||||
@ -167,12 +180,23 @@ static inline unsigned get_max_io_size(struct bio *bio,
|
||||
{
|
||||
unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
|
||||
unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
|
||||
unsigned max_sectors = lim->max_sectors, start, end;
|
||||
bool is_atomic = bio->bi_opf & REQ_ATOMIC;
|
||||
unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
|
||||
unsigned max_sectors, start, end;
|
||||
|
||||
if (lim->chunk_sectors) {
|
||||
/*
|
||||
* We ignore lim->max_sectors for atomic writes because it may less
|
||||
* than the actual bio size, which we cannot tolerate.
|
||||
*/
|
||||
if (is_atomic)
|
||||
max_sectors = lim->atomic_write_max_sectors;
|
||||
else
|
||||
max_sectors = lim->max_sectors;
|
||||
|
||||
if (boundary_sectors) {
|
||||
max_sectors = min(max_sectors,
|
||||
blk_chunk_sectors_left(bio->bi_iter.bi_sector,
|
||||
lim->chunk_sectors));
|
||||
blk_boundary_sectors_left(bio->bi_iter.bi_sector,
|
||||
boundary_sectors));
|
||||
}
|
||||
|
||||
start = bio->bi_iter.bi_sector & (pbs - 1);
|
||||
@ -185,23 +209,22 @@ static inline unsigned get_max_io_size(struct bio *bio,
|
||||
/**
|
||||
* get_max_segment_size() - maximum number of bytes to add as a single segment
|
||||
* @lim: Request queue limits.
|
||||
* @start_page: See below.
|
||||
* @offset: Offset from @start_page where to add a segment.
|
||||
* @paddr: address of the range to add
|
||||
* @len: maximum length available to add at @paddr
|
||||
*
|
||||
* Returns the maximum number of bytes that can be added as a single segment.
|
||||
* Returns the maximum number of bytes of the range starting at @paddr that can
|
||||
* be added to a single segment.
|
||||
*/
|
||||
static inline unsigned get_max_segment_size(const struct queue_limits *lim,
|
||||
struct page *start_page, unsigned long offset)
|
||||
phys_addr_t paddr, unsigned int len)
|
||||
{
|
||||
unsigned long mask = lim->seg_boundary_mask;
|
||||
|
||||
offset = mask & (page_to_phys(start_page) + offset);
|
||||
|
||||
/*
|
||||
* Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
|
||||
* after having calculated the minimum.
|
||||
*/
|
||||
return min(mask - offset, (unsigned long)lim->max_segment_size - 1) + 1;
|
||||
return min_t(unsigned long, len,
|
||||
min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr),
|
||||
(unsigned long)lim->max_segment_size - 1) + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -234,9 +257,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
|
||||
unsigned seg_size = 0;
|
||||
|
||||
while (len && *nsegs < max_segs) {
|
||||
seg_size = get_max_segment_size(lim, bv->bv_page,
|
||||
bv->bv_offset + total_len);
|
||||
seg_size = min(seg_size, len);
|
||||
seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
|
||||
|
||||
(*nsegs)++;
|
||||
total_len += seg_size;
|
||||
@ -305,6 +326,11 @@ struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
|
||||
*segs = nsegs;
|
||||
return NULL;
|
||||
split:
|
||||
if (bio->bi_opf & REQ_ATOMIC) {
|
||||
bio->bi_status = BLK_STS_INVAL;
|
||||
bio_endio(bio);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
/*
|
||||
* We can't sanely support splitting for a REQ_NOWAIT bio. End it
|
||||
* with EAGAIN if splitting is required and return an error pointer.
|
||||
@ -465,8 +491,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
|
||||
|
||||
while (nbytes > 0) {
|
||||
unsigned offset = bvec->bv_offset + total;
|
||||
unsigned len = min(get_max_segment_size(&q->limits,
|
||||
bvec->bv_page, offset), nbytes);
|
||||
unsigned len = get_max_segment_size(&q->limits,
|
||||
bvec_phys(bvec) + total, nbytes);
|
||||
struct page *page = bvec->bv_page;
|
||||
|
||||
/*
|
||||
@ -588,18 +614,22 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
|
||||
sector_t offset)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned int max_sectors;
|
||||
struct queue_limits *lim = &q->limits;
|
||||
unsigned int max_sectors, boundary_sectors;
|
||||
bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
|
||||
|
||||
if (blk_rq_is_passthrough(rq))
|
||||
return q->limits.max_hw_sectors;
|
||||
|
||||
max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
|
||||
if (!q->limits.chunk_sectors ||
|
||||
boundary_sectors = blk_boundary_sectors(lim, is_atomic);
|
||||
max_sectors = blk_queue_get_max_sectors(rq);
|
||||
|
||||
if (!boundary_sectors ||
|
||||
req_op(rq) == REQ_OP_DISCARD ||
|
||||
req_op(rq) == REQ_OP_SECURE_ERASE)
|
||||
return max_sectors;
|
||||
return min(max_sectors,
|
||||
blk_chunk_sectors_left(offset, q->limits.chunk_sectors));
|
||||
blk_boundary_sectors_left(offset, boundary_sectors));
|
||||
}
|
||||
|
||||
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
|
||||
@ -797,6 +827,18 @@ static enum elv_merge blk_try_req_merge(struct request *req,
|
||||
return ELEVATOR_NO_MERGE;
|
||||
}
|
||||
|
||||
static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
|
||||
}
|
||||
|
||||
static bool blk_atomic_write_mergeable_rqs(struct request *rq,
|
||||
struct request *next)
|
||||
{
|
||||
return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
|
||||
}
|
||||
|
||||
/*
|
||||
* For non-mq, this has to be called with the request spinlock acquired.
|
||||
* For mq with scheduling, the appropriate queue wide lock should be held.
|
||||
@ -820,6 +862,9 @@ static struct request *attempt_merge(struct request_queue *q,
|
||||
if (req->ioprio != next->ioprio)
|
||||
return NULL;
|
||||
|
||||
if (!blk_atomic_write_mergeable_rqs(req, next))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* If we are allowed to merge, then append bio list
|
||||
* from next to rq and release next. merge_requests_fn
|
||||
@ -951,6 +996,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
|
||||
if (rq->ioprio != bio_prio(bio))
|
||||
return false;
|
||||
|
||||
if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -84,28 +84,15 @@ static const char *const blk_queue_flag_name[] = {
|
||||
QUEUE_FLAG_NAME(NOMERGES),
|
||||
QUEUE_FLAG_NAME(SAME_COMP),
|
||||
QUEUE_FLAG_NAME(FAIL_IO),
|
||||
QUEUE_FLAG_NAME(NONROT),
|
||||
QUEUE_FLAG_NAME(IO_STAT),
|
||||
QUEUE_FLAG_NAME(NOXMERGES),
|
||||
QUEUE_FLAG_NAME(ADD_RANDOM),
|
||||
QUEUE_FLAG_NAME(SYNCHRONOUS),
|
||||
QUEUE_FLAG_NAME(SAME_FORCE),
|
||||
QUEUE_FLAG_NAME(INIT_DONE),
|
||||
QUEUE_FLAG_NAME(STABLE_WRITES),
|
||||
QUEUE_FLAG_NAME(POLL),
|
||||
QUEUE_FLAG_NAME(WC),
|
||||
QUEUE_FLAG_NAME(FUA),
|
||||
QUEUE_FLAG_NAME(DAX),
|
||||
QUEUE_FLAG_NAME(STATS),
|
||||
QUEUE_FLAG_NAME(REGISTERED),
|
||||
QUEUE_FLAG_NAME(QUIESCED),
|
||||
QUEUE_FLAG_NAME(PCI_P2PDMA),
|
||||
QUEUE_FLAG_NAME(ZONE_RESETALL),
|
||||
QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
|
||||
QUEUE_FLAG_NAME(HCTX_ACTIVE),
|
||||
QUEUE_FLAG_NAME(NOWAIT),
|
||||
QUEUE_FLAG_NAME(SQ_SCHED),
|
||||
QUEUE_FLAG_NAME(SKIP_TAGSET_QUIESCE),
|
||||
};
|
||||
#undef QUEUE_FLAG_NAME
|
||||
|
||||
|
@ -448,6 +448,10 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
||||
if (data->cmd_flags & REQ_NOWAIT)
|
||||
data->flags |= BLK_MQ_REQ_NOWAIT;
|
||||
|
||||
retry:
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
|
||||
|
||||
if (q->elevator) {
|
||||
/*
|
||||
* All requests use scheduler tags when an I/O scheduler is
|
||||
@ -469,13 +473,9 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
|
||||
if (ops->limit_depth)
|
||||
ops->limit_depth(data->cmd_flags, data);
|
||||
}
|
||||
}
|
||||
|
||||
retry:
|
||||
data->ctx = blk_mq_get_ctx(q);
|
||||
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
|
||||
if (!(data->rq_flags & RQF_SCHED_TAGS))
|
||||
} else {
|
||||
blk_mq_tag_busy(data->hctx);
|
||||
}
|
||||
|
||||
if (data->flags & BLK_MQ_REQ_RESERVED)
|
||||
data->rq_flags |= RQF_RESV;
|
||||
@ -804,10 +804,8 @@ static void blk_complete_request(struct request *req)
|
||||
if (!bio)
|
||||
return;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ)
|
||||
req->q->integrity.profile->complete_fn(req, total_bytes);
|
||||
#endif
|
||||
blk_integrity_complete(req, total_bytes);
|
||||
|
||||
/*
|
||||
* Upper layers may call blk_crypto_evict_key() anytime after the last
|
||||
@ -875,11 +873,9 @@ bool blk_update_request(struct request *req, blk_status_t error,
|
||||
if (!req->bio)
|
||||
return false;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
|
||||
error == BLK_STS_OK)
|
||||
req->q->integrity.profile->complete_fn(req, nr_bytes);
|
||||
#endif
|
||||
blk_integrity_complete(req, nr_bytes);
|
||||
|
||||
/*
|
||||
* Upper layers may call blk_crypto_evict_key() anytime after the last
|
||||
@ -1264,10 +1260,9 @@ void blk_mq_start_request(struct request *rq)
|
||||
WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT);
|
||||
rq->mq_hctx->tags->rqs[rq->tag] = rq;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
|
||||
q->integrity.profile->prepare_fn(rq);
|
||||
#endif
|
||||
blk_integrity_prepare(rq);
|
||||
|
||||
if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
|
||||
WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num);
|
||||
}
|
||||
@ -2914,6 +2909,17 @@ static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
|
||||
INIT_LIST_HEAD(&rq->queuelist);
|
||||
}
|
||||
|
||||
static bool bio_unaligned(const struct bio *bio, struct request_queue *q)
|
||||
{
|
||||
unsigned int bs_mask = queue_logical_block_size(q) - 1;
|
||||
|
||||
/* .bi_sector of any zero sized bio need to be initialized */
|
||||
if ((bio->bi_iter.bi_size & bs_mask) ||
|
||||
((bio->bi_iter.bi_sector << SECTOR_SHIFT) & bs_mask))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* blk_mq_submit_bio - Create and send a request to block device.
|
||||
* @bio: Bio pointer.
|
||||
@ -2966,6 +2972,15 @@ void blk_mq_submit_bio(struct bio *bio)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Device reconfiguration may change logical block size, so alignment
|
||||
* check has to be done with queue usage counter held
|
||||
*/
|
||||
if (unlikely(bio_unaligned(bio, q))) {
|
||||
bio_io_error(bio);
|
||||
goto queue_exit;
|
||||
}
|
||||
|
||||
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
|
||||
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
|
||||
if (!bio)
|
||||
@ -3041,7 +3056,7 @@ queue_exit:
|
||||
blk_status_t blk_insert_cloned_request(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
unsigned int max_sectors = blk_queue_get_max_sectors(q, req_op(rq));
|
||||
unsigned int max_sectors = blk_queue_get_max_sectors(rq);
|
||||
unsigned int max_segments = blk_rq_get_max_segments(rq);
|
||||
blk_status_t ret;
|
||||
|
||||
@ -4114,6 +4129,12 @@ void blk_mq_release(struct request_queue *q)
|
||||
blk_mq_sysfs_deinit(q);
|
||||
}
|
||||
|
||||
static bool blk_mq_can_poll(struct blk_mq_tag_set *set)
|
||||
{
|
||||
return set->nr_maps > HCTX_TYPE_POLL &&
|
||||
set->map[HCTX_TYPE_POLL].nr_queues;
|
||||
}
|
||||
|
||||
struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
|
||||
struct queue_limits *lim, void *queuedata)
|
||||
{
|
||||
@ -4121,7 +4142,13 @@ struct request_queue *blk_mq_alloc_queue(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q;
|
||||
int ret;
|
||||
|
||||
q = blk_alloc_queue(lim ? lim : &default_lim, set->numa_node);
|
||||
if (!lim)
|
||||
lim = &default_lim;
|
||||
lim->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
|
||||
if (blk_mq_can_poll(set))
|
||||
lim->features |= BLK_FEAT_POLL;
|
||||
|
||||
q = blk_alloc_queue(lim, set->numa_node);
|
||||
if (IS_ERR(q))
|
||||
return q;
|
||||
q->queuedata = queuedata;
|
||||
@ -4274,17 +4301,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
}
|
||||
|
||||
static void blk_mq_update_poll_flag(struct request_queue *q)
|
||||
{
|
||||
struct blk_mq_tag_set *set = q->tag_set;
|
||||
|
||||
if (set->nr_maps > HCTX_TYPE_POLL &&
|
||||
set->map[HCTX_TYPE_POLL].nr_queues)
|
||||
blk_queue_flag_set(QUEUE_FLAG_POLL, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
|
||||
}
|
||||
|
||||
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
struct request_queue *q)
|
||||
{
|
||||
@ -4312,7 +4328,6 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
|
||||
q->tag_set = set;
|
||||
|
||||
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
|
||||
blk_mq_update_poll_flag(q);
|
||||
|
||||
INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
|
||||
INIT_LIST_HEAD(&q->flush_list);
|
||||
@ -4636,13 +4651,15 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||
int ret;
|
||||
unsigned long i;
|
||||
|
||||
if (WARN_ON_ONCE(!q->mq_freeze_depth))
|
||||
return -EINVAL;
|
||||
|
||||
if (!set)
|
||||
return -EINVAL;
|
||||
|
||||
if (q->nr_requests == nr)
|
||||
return 0;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
||||
ret = 0;
|
||||
@ -4676,7 +4693,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
|
||||
}
|
||||
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -4798,8 +4814,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
|
||||
fallback:
|
||||
blk_mq_update_queue_map(set);
|
||||
list_for_each_entry(q, &set->tag_list, tag_set_list) {
|
||||
struct queue_limits lim;
|
||||
|
||||
blk_mq_realloc_hw_ctxs(set, q);
|
||||
blk_mq_update_poll_flag(q);
|
||||
|
||||
if (q->nr_hw_queues != set->nr_hw_queues) {
|
||||
int i = prev_nr_hw_queues;
|
||||
|
||||
@ -4811,6 +4829,13 @@ fallback:
|
||||
set->nr_hw_queues = prev_nr_hw_queues;
|
||||
goto fallback;
|
||||
}
|
||||
lim = queue_limits_start_update(q);
|
||||
if (blk_mq_can_poll(set))
|
||||
lim.features |= BLK_FEAT_POLL;
|
||||
else
|
||||
lim.features &= ~BLK_FEAT_POLL;
|
||||
if (queue_limits_commit_update(q, &lim) < 0)
|
||||
pr_warn("updating the poll flag failed\n");
|
||||
blk_mq_map_swqueue(q);
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-integrity.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/backing-dev-defs.h>
|
||||
#include <linux/gcd.h>
|
||||
@ -55,7 +55,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_stacking_limits);
|
||||
|
||||
static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
/*
|
||||
@ -68,7 +68,7 @@ static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
|
||||
static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
{
|
||||
if (!lim->zoned) {
|
||||
if (!(lim->features & BLK_FEAT_ZONED)) {
|
||||
if (WARN_ON_ONCE(lim->max_open_zones) ||
|
||||
WARN_ON_ONCE(lim->max_active_zones) ||
|
||||
WARN_ON_ONCE(lim->zone_write_granularity) ||
|
||||
@ -80,6 +80,14 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Given that active zones include open zones, the maximum number of
|
||||
* open zones cannot be larger than the maximum number of active zones.
|
||||
*/
|
||||
if (lim->max_active_zones &&
|
||||
lim->max_open_zones > lim->max_active_zones)
|
||||
return -EINVAL;
|
||||
|
||||
if (lim->zone_write_granularity < lim->logical_block_size)
|
||||
lim->zone_write_granularity = lim->logical_block_size;
|
||||
|
||||
@ -97,6 +105,120 @@ static int blk_validate_zoned_limits(struct queue_limits *lim)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blk_validate_integrity_limits(struct queue_limits *lim)
|
||||
{
|
||||
struct blk_integrity *bi = &lim->integrity;
|
||||
|
||||
if (!bi->tuple_size) {
|
||||
if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE ||
|
||||
bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) {
|
||||
pr_warn("invalid PI settings.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
|
||||
pr_warn("integrity support disabled.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE &&
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG)) {
|
||||
pr_warn("ref tag not support without checksum.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!bi->interval_exp)
|
||||
bi->interval_exp = ilog2(lim->logical_block_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns max guaranteed bytes which we can fit in a bio.
|
||||
*
|
||||
* We request that an atomic_write is ITER_UBUF iov_iter (so a single vector),
|
||||
* so we assume that we can fit in at least PAGE_SIZE in a segment, apart from
|
||||
* the first and last segments.
|
||||
*/
|
||||
static unsigned int blk_queue_max_guaranteed_bio(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_segments = min(BIO_MAX_VECS, lim->max_segments);
|
||||
unsigned int length;
|
||||
|
||||
length = min(max_segments, 2) * lim->logical_block_size;
|
||||
if (max_segments > 2)
|
||||
length += (max_segments - 2) * PAGE_SIZE;
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
static void blk_atomic_writes_update_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int unit_limit = min(lim->max_hw_sectors << SECTOR_SHIFT,
|
||||
blk_queue_max_guaranteed_bio(lim));
|
||||
|
||||
unit_limit = rounddown_pow_of_two(unit_limit);
|
||||
|
||||
lim->atomic_write_max_sectors =
|
||||
min(lim->atomic_write_hw_max >> SECTOR_SHIFT,
|
||||
lim->max_hw_sectors);
|
||||
lim->atomic_write_unit_min =
|
||||
min(lim->atomic_write_hw_unit_min, unit_limit);
|
||||
lim->atomic_write_unit_max =
|
||||
min(lim->atomic_write_hw_unit_max, unit_limit);
|
||||
lim->atomic_write_boundary_sectors =
|
||||
lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
static void blk_validate_atomic_write_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int boundary_sectors;
|
||||
|
||||
if (!lim->atomic_write_hw_max)
|
||||
goto unsupported;
|
||||
|
||||
boundary_sectors = lim->atomic_write_hw_boundary >> SECTOR_SHIFT;
|
||||
|
||||
if (boundary_sectors) {
|
||||
/*
|
||||
* A feature of boundary support is that it disallows bios to
|
||||
* be merged which would result in a merged request which
|
||||
* crosses either a chunk sector or atomic write HW boundary,
|
||||
* even though chunk sectors may be just set for performance.
|
||||
* For simplicity, disallow atomic writes for a chunk sector
|
||||
* which is non-zero and smaller than atomic write HW boundary.
|
||||
* Furthermore, chunk sectors must be a multiple of atomic
|
||||
* write HW boundary. Otherwise boundary support becomes
|
||||
* complicated.
|
||||
* Devices which do not conform to these rules can be dealt
|
||||
* with if and when they show up.
|
||||
*/
|
||||
if (WARN_ON_ONCE(lim->chunk_sectors % boundary_sectors))
|
||||
goto unsupported;
|
||||
|
||||
/*
|
||||
* The boundary size just needs to be a multiple of unit_max
|
||||
* (and not necessarily a power-of-2), so this following check
|
||||
* could be relaxed in future.
|
||||
* Furthermore, if needed, unit_max could even be reduced so
|
||||
* that it is compliant with a !power-of-2 boundary.
|
||||
*/
|
||||
if (!is_power_of_2(boundary_sectors))
|
||||
goto unsupported;
|
||||
}
|
||||
|
||||
blk_atomic_writes_update_limits(lim);
|
||||
return;
|
||||
|
||||
unsupported:
|
||||
lim->atomic_write_max_sectors = 0;
|
||||
lim->atomic_write_boundary_sectors = 0;
|
||||
lim->atomic_write_unit_min = 0;
|
||||
lim->atomic_write_unit_max = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the limits in lim are valid, initialize defaults for unset
|
||||
* values, and cap values based on others where needed.
|
||||
@ -105,6 +227,7 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
{
|
||||
unsigned int max_hw_sectors;
|
||||
unsigned int logical_block_sectors;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* Unless otherwise specified, default to 512 byte logical blocks and a
|
||||
@ -112,6 +235,10 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
*/
|
||||
if (!lim->logical_block_size)
|
||||
lim->logical_block_size = SECTOR_SIZE;
|
||||
else if (blk_validate_block_size(lim->logical_block_size)) {
|
||||
pr_warn("Invalid logical block size (%d)\n", lim->logical_block_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (lim->physical_block_size < lim->logical_block_size)
|
||||
lim->physical_block_size = lim->logical_block_size;
|
||||
|
||||
@ -153,6 +280,12 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
|
||||
return -EINVAL;
|
||||
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
|
||||
} else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
|
||||
lim->max_sectors =
|
||||
min(max_hw_sectors, lim->io_opt >> SECTOR_SHIFT);
|
||||
} else if (lim->io_min > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
|
||||
lim->max_sectors =
|
||||
min(max_hw_sectors, lim->io_min >> SECTOR_SHIFT);
|
||||
} else {
|
||||
lim->max_sectors = min(max_hw_sectors, BLK_DEF_MAX_SECTORS_CAP);
|
||||
}
|
||||
@ -220,9 +353,17 @@ static int blk_validate_limits(struct queue_limits *lim)
|
||||
|
||||
if (lim->alignment_offset) {
|
||||
lim->alignment_offset &= (lim->physical_block_size - 1);
|
||||
lim->misaligned = 0;
|
||||
lim->flags &= ~BLK_FLAG_MISALIGNED;
|
||||
}
|
||||
|
||||
if (!(lim->features & BLK_FEAT_WRITE_CACHE))
|
||||
lim->features &= ~BLK_FEAT_FUA;
|
||||
|
||||
blk_validate_atomic_write_limits(lim);
|
||||
|
||||
err = blk_validate_integrity_limits(lim);
|
||||
if (err)
|
||||
return err;
|
||||
return blk_validate_zoned_limits(lim);
|
||||
}
|
||||
|
||||
@ -254,15 +395,25 @@ int blk_set_default_limits(struct queue_limits *lim)
|
||||
*/
|
||||
int queue_limits_commit_update(struct request_queue *q,
|
||||
struct queue_limits *lim)
|
||||
__releases(q->limits_lock)
|
||||
{
|
||||
int error = blk_validate_limits(lim);
|
||||
int error;
|
||||
|
||||
error = blk_validate_limits(lim);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
|
||||
if (q->crypto_profile && lim->integrity.tag_size) {
|
||||
pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together.\n");
|
||||
error = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!error) {
|
||||
q->limits = *lim;
|
||||
if (q->disk)
|
||||
blk_apply_bdi_limits(q->disk->bdi, lim);
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&q->limits_lock);
|
||||
return error;
|
||||
}
|
||||
@ -286,204 +437,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_set);
|
||||
|
||||
/**
|
||||
* blk_queue_chunk_sectors - set size of the chunk for this queue
|
||||
* @q: the request queue for the device
|
||||
* @chunk_sectors: chunk sectors in the usual 512b unit
|
||||
*
|
||||
* Description:
|
||||
* If a driver doesn't want IOs to cross a given chunk size, it can set
|
||||
* this limit and prevent merging across chunks. Note that the block layer
|
||||
* must accept a page worth of data at any offset. So if the crossing of
|
||||
* chunks is a hard limitation in the driver, it must still be prepared
|
||||
* to split single page bios.
|
||||
**/
|
||||
void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
|
||||
{
|
||||
q->limits.chunk_sectors = chunk_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_chunk_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_discard_sectors - set max sectors for a single discard
|
||||
* @q: the request queue for the device
|
||||
* @max_discard_sectors: maximum number of sectors to discard
|
||||
**/
|
||||
void blk_queue_max_discard_sectors(struct request_queue *q,
|
||||
unsigned int max_discard_sectors)
|
||||
{
|
||||
struct queue_limits *lim = &q->limits;
|
||||
|
||||
lim->max_hw_discard_sectors = max_discard_sectors;
|
||||
lim->max_discard_sectors =
|
||||
min(max_discard_sectors, lim->max_user_discard_sectors);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_secure_erase_sectors - set max sectors for a secure erase
|
||||
* @q: the request queue for the device
|
||||
* @max_sectors: maximum number of sectors to secure_erase
|
||||
**/
|
||||
void blk_queue_max_secure_erase_sectors(struct request_queue *q,
|
||||
unsigned int max_sectors)
|
||||
{
|
||||
q->limits.max_secure_erase_sectors = max_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_write_zeroes_sectors - set max sectors for a single
|
||||
* write zeroes
|
||||
* @q: the request queue for the device
|
||||
* @max_write_zeroes_sectors: maximum number of sectors to write per command
|
||||
**/
|
||||
void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
|
||||
unsigned int max_write_zeroes_sectors)
|
||||
{
|
||||
q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_max_zone_append_sectors - set max sectors for a single zone append
|
||||
* @q: the request queue for the device
|
||||
* @max_zone_append_sectors: maximum number of sectors to write per command
|
||||
*
|
||||
* Sets the maximum number of sectors allowed for zone append commands. If
|
||||
* Specifying 0 for @max_zone_append_sectors indicates that the queue does
|
||||
* not natively support zone append operations and that the block layer must
|
||||
* emulate these operations using regular writes.
|
||||
**/
|
||||
void blk_queue_max_zone_append_sectors(struct request_queue *q,
|
||||
unsigned int max_zone_append_sectors)
|
||||
{
|
||||
unsigned int max_sectors = 0;
|
||||
|
||||
if (WARN_ON(!blk_queue_is_zoned(q)))
|
||||
return;
|
||||
|
||||
if (max_zone_append_sectors) {
|
||||
max_sectors = min(q->limits.max_hw_sectors,
|
||||
max_zone_append_sectors);
|
||||
max_sectors = min(q->limits.chunk_sectors, max_sectors);
|
||||
|
||||
/*
|
||||
* Signal eventual driver bugs resulting in the max_zone_append
|
||||
* sectors limit being 0 due to the chunk_sectors limit (zone
|
||||
* size) not set or the max_hw_sectors limit not set.
|
||||
*/
|
||||
WARN_ON_ONCE(!max_sectors);
|
||||
}
|
||||
|
||||
q->limits.max_zone_append_sectors = max_sectors;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors);
|
||||
|
||||
/**
|
||||
* blk_queue_logical_block_size - set logical block size for the queue
|
||||
* @q: the request queue for the device
|
||||
* @size: the logical block size, in bytes
|
||||
*
|
||||
* Description:
|
||||
* This should be set to the lowest possible block size that the
|
||||
* storage device can address. The default of 512 covers most
|
||||
* hardware.
|
||||
**/
|
||||
void blk_queue_logical_block_size(struct request_queue *q, unsigned int size)
|
||||
{
|
||||
struct queue_limits *limits = &q->limits;
|
||||
|
||||
limits->logical_block_size = size;
|
||||
|
||||
if (limits->discard_granularity < limits->logical_block_size)
|
||||
limits->discard_granularity = limits->logical_block_size;
|
||||
|
||||
if (limits->physical_block_size < size)
|
||||
limits->physical_block_size = size;
|
||||
|
||||
if (limits->io_min < limits->physical_block_size)
|
||||
limits->io_min = limits->physical_block_size;
|
||||
|
||||
limits->max_hw_sectors =
|
||||
round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT);
|
||||
limits->max_sectors =
|
||||
round_down(limits->max_sectors, size >> SECTOR_SHIFT);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_logical_block_size);
|
||||
|
||||
/**
|
||||
* blk_queue_physical_block_size - set physical block size for the queue
|
||||
* @q: the request queue for the device
|
||||
* @size: the physical block size, in bytes
|
||||
*
|
||||
* Description:
|
||||
* This should be set to the lowest possible sector size that the
|
||||
* hardware can operate on without reverting to read-modify-write
|
||||
* operations.
|
||||
*/
|
||||
void blk_queue_physical_block_size(struct request_queue *q, unsigned int size)
|
||||
{
|
||||
q->limits.physical_block_size = size;
|
||||
|
||||
if (q->limits.physical_block_size < q->limits.logical_block_size)
|
||||
q->limits.physical_block_size = q->limits.logical_block_size;
|
||||
|
||||
if (q->limits.discard_granularity < q->limits.physical_block_size)
|
||||
q->limits.discard_granularity = q->limits.physical_block_size;
|
||||
|
||||
if (q->limits.io_min < q->limits.physical_block_size)
|
||||
q->limits.io_min = q->limits.physical_block_size;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_physical_block_size);
|
||||
|
||||
/**
|
||||
* blk_queue_zone_write_granularity - set zone write granularity for the queue
|
||||
* @q: the request queue for the zoned device
|
||||
* @size: the zone write granularity size, in bytes
|
||||
*
|
||||
* Description:
|
||||
* This should be set to the lowest possible size allowing to write in
|
||||
* sequential zones of a zoned block device.
|
||||
*/
|
||||
void blk_queue_zone_write_granularity(struct request_queue *q,
|
||||
unsigned int size)
|
||||
{
|
||||
if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
|
||||
return;
|
||||
|
||||
q->limits.zone_write_granularity = size;
|
||||
|
||||
if (q->limits.zone_write_granularity < q->limits.logical_block_size)
|
||||
q->limits.zone_write_granularity = q->limits.logical_block_size;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity);
|
||||
|
||||
/**
|
||||
* blk_queue_alignment_offset - set physical block alignment offset
|
||||
* @q: the request queue for the device
|
||||
* @offset: alignment offset in bytes
|
||||
*
|
||||
* Description:
|
||||
* Some devices are naturally misaligned to compensate for things like
|
||||
* the legacy DOS partition table 63-sector offset. Low-level drivers
|
||||
* should call this function for devices whose first sector is not
|
||||
* naturally aligned.
|
||||
*/
|
||||
void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
|
||||
{
|
||||
q->limits.alignment_offset =
|
||||
offset & (q->limits.physical_block_size - 1);
|
||||
q->limits.misaligned = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_alignment_offset);
|
||||
|
||||
void disk_update_readahead(struct gendisk *disk)
|
||||
{
|
||||
blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_update_readahead);
|
||||
|
||||
/**
|
||||
* blk_limits_io_min - set minimum request size for a device
|
||||
* @limits: the queue limits
|
||||
@ -507,26 +460,6 @@ void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_limits_io_min);
|
||||
|
||||
/**
|
||||
* blk_queue_io_min - set minimum request size for the queue
|
||||
* @q: the request queue for the device
|
||||
* @min: smallest I/O size in bytes
|
||||
*
|
||||
* Description:
|
||||
* Storage devices may report a granularity or preferred minimum I/O
|
||||
* size which is the smallest request the device can perform without
|
||||
* incurring a performance penalty. For disk drives this is often the
|
||||
* physical block size. For RAID arrays it is often the stripe chunk
|
||||
* size. A properly aligned multiple of minimum_io_size is the
|
||||
* preferred request size for workloads where a high number of I/O
|
||||
* operations is desired.
|
||||
*/
|
||||
void blk_queue_io_min(struct request_queue *q, unsigned int min)
|
||||
{
|
||||
blk_limits_io_min(&q->limits, min);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_io_min);
|
||||
|
||||
/**
|
||||
* blk_limits_io_opt - set optimal request size for a device
|
||||
* @limits: the queue limits
|
||||
@ -614,6 +547,21 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
{
|
||||
unsigned int top, bottom, alignment, ret = 0;
|
||||
|
||||
t->features |= (b->features & BLK_FEAT_INHERIT_MASK);
|
||||
|
||||
/*
|
||||
* BLK_FEAT_NOWAIT and BLK_FEAT_POLL need to be supported both by the
|
||||
* stacking driver and all underlying devices. The stacking driver sets
|
||||
* the flags before stacking the limits, and this will clear the flags
|
||||
* if any of the underlying devices does not support it.
|
||||
*/
|
||||
if (!(b->features & BLK_FEAT_NOWAIT))
|
||||
t->features &= ~BLK_FEAT_NOWAIT;
|
||||
if (!(b->features & BLK_FEAT_POLL))
|
||||
t->features &= ~BLK_FEAT_POLL;
|
||||
|
||||
t->flags |= (b->flags & BLK_FLAG_MISALIGNED);
|
||||
|
||||
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
|
||||
t->max_user_sectors = min_not_zero(t->max_user_sectors,
|
||||
b->max_user_sectors);
|
||||
@ -623,7 +571,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
b->max_write_zeroes_sectors);
|
||||
t->max_zone_append_sectors = min(queue_limits_max_zone_append_sectors(t),
|
||||
queue_limits_max_zone_append_sectors(b));
|
||||
t->bounce = max(t->bounce, b->bounce);
|
||||
|
||||
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
|
||||
b->seg_boundary_mask);
|
||||
@ -639,8 +586,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
t->max_segment_size = min_not_zero(t->max_segment_size,
|
||||
b->max_segment_size);
|
||||
|
||||
t->misaligned |= b->misaligned;
|
||||
|
||||
alignment = queue_limit_alignment_offset(b, start);
|
||||
|
||||
/* Bottom device has different alignment. Check that it is
|
||||
@ -654,7 +599,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
|
||||
/* Verify that top and bottom intervals line up */
|
||||
if (max(top, bottom) % min(top, bottom)) {
|
||||
t->misaligned = 1;
|
||||
t->flags |= BLK_FLAG_MISALIGNED;
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
@ -676,42 +621,38 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
/* Physical block size a multiple of the logical block size? */
|
||||
if (t->physical_block_size & (t->logical_block_size - 1)) {
|
||||
t->physical_block_size = t->logical_block_size;
|
||||
t->misaligned = 1;
|
||||
t->flags |= BLK_FLAG_MISALIGNED;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
/* Minimum I/O a multiple of the physical block size? */
|
||||
if (t->io_min & (t->physical_block_size - 1)) {
|
||||
t->io_min = t->physical_block_size;
|
||||
t->misaligned = 1;
|
||||
t->flags |= BLK_FLAG_MISALIGNED;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
/* Optimal I/O a multiple of the physical block size? */
|
||||
if (t->io_opt & (t->physical_block_size - 1)) {
|
||||
t->io_opt = 0;
|
||||
t->misaligned = 1;
|
||||
t->flags |= BLK_FLAG_MISALIGNED;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
/* chunk_sectors a multiple of the physical block size? */
|
||||
if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
|
||||
t->chunk_sectors = 0;
|
||||
t->misaligned = 1;
|
||||
t->flags |= BLK_FLAG_MISALIGNED;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
t->raid_partial_stripes_expensive =
|
||||
max(t->raid_partial_stripes_expensive,
|
||||
b->raid_partial_stripes_expensive);
|
||||
|
||||
/* Find lowest common alignment_offset */
|
||||
t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment)
|
||||
% max(t->physical_block_size, t->io_min);
|
||||
|
||||
/* Verify that new alignment_offset is on a logical block boundary */
|
||||
if (t->alignment_offset & (t->logical_block_size - 1)) {
|
||||
t->misaligned = 1;
|
||||
t->flags |= BLK_FLAG_MISALIGNED;
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
@ -723,16 +664,6 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
if (b->discard_granularity) {
|
||||
alignment = queue_limit_discard_alignment(b, start);
|
||||
|
||||
if (t->discard_granularity != 0 &&
|
||||
t->discard_alignment != alignment) {
|
||||
top = t->discard_granularity + t->discard_alignment;
|
||||
bottom = b->discard_granularity + alignment;
|
||||
|
||||
/* Verify that top and bottom intervals line up */
|
||||
if ((max(top, bottom) % min(top, bottom)) != 0)
|
||||
t->discard_misaligned = 1;
|
||||
}
|
||||
|
||||
t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
|
||||
b->max_discard_sectors);
|
||||
t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
|
||||
@ -746,8 +677,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
|
||||
b->max_secure_erase_sectors);
|
||||
t->zone_write_granularity = max(t->zone_write_granularity,
|
||||
b->zone_write_granularity);
|
||||
t->zoned = max(t->zoned, b->zoned);
|
||||
if (!t->zoned) {
|
||||
if (!(t->features & BLK_FEAT_ZONED)) {
|
||||
t->zone_write_granularity = 0;
|
||||
t->max_zone_append_sectors = 0;
|
||||
}
|
||||
@ -781,21 +711,65 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
|
||||
EXPORT_SYMBOL_GPL(queue_limits_stack_bdev);
|
||||
|
||||
/**
|
||||
* blk_queue_update_dma_pad - update pad mask
|
||||
* @q: the request queue for the device
|
||||
* @mask: pad mask
|
||||
* queue_limits_stack_integrity - stack integrity profile
|
||||
* @t: target queue limits
|
||||
* @b: base queue limits
|
||||
*
|
||||
* Update dma pad mask.
|
||||
* Check if the integrity profile in the @b can be stacked into the
|
||||
* target @t. Stacking is possible if either:
|
||||
*
|
||||
* Appending pad buffer to a request modifies the last entry of a
|
||||
* scatter list such that it includes the pad buffer.
|
||||
**/
|
||||
void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
|
||||
* a) does not have any integrity information stacked into it yet
|
||||
* b) the integrity profile in @b is identical to the one in @t
|
||||
*
|
||||
* If @b can be stacked into @t, return %true. Else return %false and clear the
|
||||
* integrity information in @t.
|
||||
*/
|
||||
bool queue_limits_stack_integrity(struct queue_limits *t,
|
||||
struct queue_limits *b)
|
||||
{
|
||||
if (mask > q->dma_pad_mask)
|
||||
q->dma_pad_mask = mask;
|
||||
struct blk_integrity *ti = &t->integrity;
|
||||
struct blk_integrity *bi = &b->integrity;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
|
||||
return true;
|
||||
|
||||
if (!ti->tuple_size) {
|
||||
/* inherit the settings from the first underlying device */
|
||||
if (!(ti->flags & BLK_INTEGRITY_STACKED)) {
|
||||
ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE |
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG);
|
||||
ti->csum_type = bi->csum_type;
|
||||
ti->tuple_size = bi->tuple_size;
|
||||
ti->pi_offset = bi->pi_offset;
|
||||
ti->interval_exp = bi->interval_exp;
|
||||
ti->tag_size = bi->tag_size;
|
||||
goto done;
|
||||
}
|
||||
EXPORT_SYMBOL(blk_queue_update_dma_pad);
|
||||
if (!bi->tuple_size)
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (ti->tuple_size != bi->tuple_size)
|
||||
goto incompatible;
|
||||
if (ti->interval_exp != bi->interval_exp)
|
||||
goto incompatible;
|
||||
if (ti->tag_size != bi->tag_size)
|
||||
goto incompatible;
|
||||
if (ti->csum_type != bi->csum_type)
|
||||
goto incompatible;
|
||||
if ((ti->flags & BLK_INTEGRITY_REF_TAG) !=
|
||||
(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
goto incompatible;
|
||||
|
||||
done:
|
||||
ti->flags |= BLK_INTEGRITY_STACKED;
|
||||
return true;
|
||||
|
||||
incompatible:
|
||||
memset(ti, 0, sizeof(*ti));
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(queue_limits_stack_integrity);
|
||||
|
||||
/**
|
||||
* blk_set_queue_depth - tell the block layer about the device queue depth
|
||||
@ -810,54 +784,11 @@ void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_queue_depth);
|
||||
|
||||
/**
|
||||
* blk_queue_write_cache - configure queue's write cache
|
||||
* @q: the request queue for the device
|
||||
* @wc: write back cache on or off
|
||||
* @fua: device supports FUA writes, if true
|
||||
*
|
||||
* Tell the block layer about the write cache of @q.
|
||||
*/
|
||||
void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
|
||||
{
|
||||
if (wc) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_HW_WC, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_WC, q);
|
||||
} else {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_HW_WC, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
|
||||
}
|
||||
if (fua)
|
||||
blk_queue_flag_set(QUEUE_FLAG_FUA, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_FUA, q);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
|
||||
|
||||
/**
|
||||
* disk_set_zoned - inidicate a zoned device
|
||||
* @disk: gendisk to configure
|
||||
*/
|
||||
void disk_set_zoned(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED));
|
||||
|
||||
/*
|
||||
* Set the zone write granularity to the device logical block
|
||||
* size by default. The driver can change this value if needed.
|
||||
*/
|
||||
q->limits.zoned = true;
|
||||
blk_queue_zone_write_granularity(q, queue_logical_block_size(q));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(disk_set_zoned);
|
||||
|
||||
int bdev_alignment_offset(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (q->limits.misaligned)
|
||||
if (q->limits.flags & BLK_FLAG_MISALIGNED)
|
||||
return -1;
|
||||
if (bdev_is_partition(bdev))
|
||||
return queue_limit_alignment_offset(&q->limits,
|
||||
|
@ -22,8 +22,8 @@
|
||||
|
||||
struct queue_sysfs_entry {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct request_queue *, char *);
|
||||
ssize_t (*store)(struct request_queue *, const char *, size_t);
|
||||
ssize_t (*show)(struct gendisk *disk, char *page);
|
||||
ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
@ -47,18 +47,18 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_requests_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_requests_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(q->nr_requests, page);
|
||||
return queue_var_show(disk->queue->nr_requests, page);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
||||
queue_requests_store(struct gendisk *disk, const char *page, size_t count)
|
||||
{
|
||||
unsigned long nr;
|
||||
int ret, err;
|
||||
|
||||
if (!queue_is_mq(q))
|
||||
if (!queue_is_mq(disk->queue))
|
||||
return -EINVAL;
|
||||
|
||||
ret = queue_var_store(&nr, page, count);
|
||||
@ -68,110 +68,90 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
|
||||
if (nr < BLKDEV_MIN_RQ)
|
||||
nr = BLKDEV_MIN_RQ;
|
||||
|
||||
err = blk_mq_update_nr_requests(q, nr);
|
||||
err = blk_mq_update_nr_requests(disk->queue, nr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_ra_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_ra_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
unsigned long ra_kb;
|
||||
|
||||
if (!q->disk)
|
||||
return -EINVAL;
|
||||
ra_kb = q->disk->bdi->ra_pages << (PAGE_SHIFT - 10);
|
||||
return queue_var_show(ra_kb, page);
|
||||
return queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_ra_store(struct request_queue *q, const char *page, size_t count)
|
||||
queue_ra_store(struct gendisk *disk, const char *page, size_t count)
|
||||
{
|
||||
unsigned long ra_kb;
|
||||
ssize_t ret;
|
||||
|
||||
if (!q->disk)
|
||||
return -EINVAL;
|
||||
ret = queue_var_store(&ra_kb, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
q->disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
|
||||
disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
|
||||
{
|
||||
int max_sectors_kb = queue_max_sectors(q) >> 1;
|
||||
|
||||
return queue_var_show(max_sectors_kb, page);
|
||||
#define QUEUE_SYSFS_LIMIT_SHOW(_field) \
|
||||
static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return queue_var_show(disk->queue->limits._field, page); \
|
||||
}
|
||||
|
||||
static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_max_segments(q), page);
|
||||
QUEUE_SYSFS_LIMIT_SHOW(max_segments)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(io_min)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(io_opt)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(discard_granularity)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(dma_alignment)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(max_open_zones)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(max_active_zones)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min)
|
||||
QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
|
||||
|
||||
#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \
|
||||
static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%llu\n", \
|
||||
(unsigned long long)disk->queue->limits._field << \
|
||||
SECTOR_SHIFT); \
|
||||
}
|
||||
|
||||
static ssize_t queue_max_discard_segments_show(struct request_queue *q,
|
||||
char *page)
|
||||
{
|
||||
return queue_var_show(queue_max_discard_segments(q), page);
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
|
||||
|
||||
#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
|
||||
static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return queue_var_show(disk->queue->limits._field >> 1, page); \
|
||||
}
|
||||
|
||||
static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.max_integrity_segments, page);
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors)
|
||||
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
|
||||
|
||||
#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
|
||||
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%d\n", _val); \
|
||||
}
|
||||
|
||||
static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_max_segment_size(q), page);
|
||||
}
|
||||
/* deprecated fields */
|
||||
QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
|
||||
QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
|
||||
QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
|
||||
|
||||
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_logical_block_size(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_physical_block_size(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_chunk_sectors_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.chunk_sectors, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_io_min_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_io_min(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_io_opt(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.discard_granularity, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
|
||||
{
|
||||
|
||||
return sprintf(page, "%llu\n",
|
||||
(unsigned long long)q->limits.max_hw_discard_sectors << 9);
|
||||
}
|
||||
|
||||
static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n",
|
||||
(unsigned long long)q->limits.max_discard_sectors << 9);
|
||||
}
|
||||
|
||||
static ssize_t queue_discard_max_store(struct request_queue *q,
|
||||
static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
|
||||
const char *page, size_t count)
|
||||
{
|
||||
unsigned long max_discard_bytes;
|
||||
@ -183,54 +163,34 @@ static ssize_t queue_discard_max_store(struct request_queue *q,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (max_discard_bytes & (q->limits.discard_granularity - 1))
|
||||
if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1))
|
||||
return -EINVAL;
|
||||
|
||||
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
lim = queue_limits_start_update(q);
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
err = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(0, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(0, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
|
||||
/*
|
||||
* For zone append queue_max_zone_append_sectors does not just return the
|
||||
* underlying queue limits, but actually contains a calculation. Because of
|
||||
* that we can't simply use QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES here.
|
||||
*/
|
||||
static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return sprintf(page, "%llu\n",
|
||||
(unsigned long long)q->limits.max_write_zeroes_sectors << 9);
|
||||
}
|
||||
|
||||
static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
|
||||
char *page)
|
||||
{
|
||||
return queue_var_show(queue_zone_write_granularity(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
{
|
||||
unsigned long long max_sectors = queue_max_zone_append_sectors(q);
|
||||
|
||||
return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
|
||||
(u64)queue_max_zone_append_sectors(disk->queue) <<
|
||||
SECTOR_SHIFT);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
||||
queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
|
||||
{
|
||||
unsigned long max_sectors_kb;
|
||||
struct queue_limits lim;
|
||||
@ -241,94 +201,83 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
lim = queue_limits_start_update(q);
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
lim.max_user_sectors = max_sectors_kb << 1;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
err = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
|
||||
size_t count, blk_features_t feature)
|
||||
{
|
||||
int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
|
||||
struct queue_limits lim;
|
||||
unsigned long val;
|
||||
ssize_t ret;
|
||||
|
||||
return queue_var_show(max_hw_sectors_kb, page);
|
||||
ret = queue_var_store(&val, page, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
if (val)
|
||||
lim.features |= feature;
|
||||
else
|
||||
lim.features &= ~feature;
|
||||
ret = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (ret)
|
||||
return ret;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(q->limits.virt_boundary_mask, page);
|
||||
}
|
||||
|
||||
static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(queue_dma_alignment(q), page);
|
||||
}
|
||||
|
||||
#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \
|
||||
static ssize_t \
|
||||
queue_##name##_show(struct request_queue *q, char *page) \
|
||||
#define QUEUE_SYSFS_FEATURE(_name, _feature) \
|
||||
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
int bit; \
|
||||
bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \
|
||||
return queue_var_show(neg ? !bit : bit, page); \
|
||||
return sprintf(page, "%u\n", \
|
||||
!!(disk->queue->limits.features & _feature)); \
|
||||
} \
|
||||
static ssize_t \
|
||||
queue_##name##_store(struct request_queue *q, const char *page, size_t count) \
|
||||
static ssize_t queue_##_name##_store(struct gendisk *disk, \
|
||||
const char *page, size_t count) \
|
||||
{ \
|
||||
unsigned long val; \
|
||||
ssize_t ret; \
|
||||
ret = queue_var_store(&val, page, count); \
|
||||
if (ret < 0) \
|
||||
return ret; \
|
||||
if (neg) \
|
||||
val = !val; \
|
||||
\
|
||||
if (val) \
|
||||
blk_queue_flag_set(QUEUE_FLAG_##flag, q); \
|
||||
else \
|
||||
blk_queue_flag_clear(QUEUE_FLAG_##flag, q); \
|
||||
return ret; \
|
||||
return queue_feature_store(disk, page, count, _feature); \
|
||||
}
|
||||
|
||||
QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
|
||||
QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
|
||||
QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
|
||||
QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
|
||||
#undef QUEUE_SYSFS_BIT_FNS
|
||||
QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
|
||||
QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
|
||||
QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
|
||||
QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
|
||||
|
||||
static ssize_t queue_zoned_show(struct request_queue *q, char *page)
|
||||
#define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \
|
||||
static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
|
||||
{ \
|
||||
return sprintf(page, "%u\n", \
|
||||
!!(disk->queue->limits.features & _feature)); \
|
||||
}
|
||||
|
||||
QUEUE_SYSFS_FEATURE_SHOW(poll, BLK_FEAT_POLL);
|
||||
QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
|
||||
QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
|
||||
|
||||
static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
if (blk_queue_is_zoned(q))
|
||||
if (blk_queue_is_zoned(disk->queue))
|
||||
return sprintf(page, "host-managed\n");
|
||||
return sprintf(page, "none\n");
|
||||
}
|
||||
|
||||
static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(disk_nr_zones(q->disk), page);
|
||||
return queue_var_show(disk_nr_zones(disk), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return queue_var_show(bdev_max_open_zones(q->disk->part0), page);
|
||||
return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
|
||||
blk_queue_noxmerges(disk->queue), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(bdev_max_active_zones(q->disk->part0), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show((blk_queue_nomerges(q) << 1) |
|
||||
blk_queue_noxmerges(q), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
|
||||
static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
unsigned long nm;
|
||||
@ -337,29 +286,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, disk->queue);
|
||||
if (nm == 2)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, disk->queue);
|
||||
else if (nm)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, disk->queue);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
|
||||
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
|
||||
bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
|
||||
bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
|
||||
|
||||
return queue_var_show(set << force, page);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
|
||||
queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
|
||||
{
|
||||
ssize_t ret = -EINVAL;
|
||||
#ifdef CONFIG_SMP
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned long val;
|
||||
|
||||
ret = queue_var_store(&val, page, count);
|
||||
@ -380,38 +330,28 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return sprintf(page, "%d\n", -1);
|
||||
}
|
||||
|
||||
static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
|
||||
static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_poll_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(test_bit(QUEUE_FLAG_POLL, &q->queue_flags), page);
|
||||
}
|
||||
|
||||
static ssize_t queue_poll_store(struct request_queue *q, const char *page,
|
||||
static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
|
||||
if (!(disk->queue->limits.features & BLK_FEAT_POLL))
|
||||
return -EINVAL;
|
||||
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
|
||||
pr_info_ratelimited("please use driver specific parameters instead.\n");
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_io_timeout_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
return sprintf(page, "%u\n", jiffies_to_msecs(q->rq_timeout));
|
||||
return sprintf(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
|
||||
}
|
||||
|
||||
static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
|
||||
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
unsigned int val;
|
||||
@ -421,46 +361,45 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
|
||||
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
|
||||
blk_queue_rq_timeout(disk->queue, msecs_to_jiffies(val));
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_wc_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_wc_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
|
||||
if (blk_queue_write_cache(disk->queue))
|
||||
return sprintf(page, "write back\n");
|
||||
|
||||
return sprintf(page, "write through\n");
|
||||
}
|
||||
|
||||
static ssize_t queue_wc_store(struct request_queue *q, const char *page,
|
||||
static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
bool disable;
|
||||
int err;
|
||||
|
||||
if (!strncmp(page, "write back", 10)) {
|
||||
if (!test_bit(QUEUE_FLAG_HW_WC, &q->queue_flags))
|
||||
return -EINVAL;
|
||||
blk_queue_flag_set(QUEUE_FLAG_WC, q);
|
||||
disable = false;
|
||||
} else if (!strncmp(page, "write through", 13) ||
|
||||
!strncmp(page, "none", 4)) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_WC, q);
|
||||
disable = true;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
if (disable)
|
||||
lim.flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
|
||||
else
|
||||
lim.flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
|
||||
err = queue_limits_commit_update(disk->queue, &lim);
|
||||
if (err)
|
||||
return err;
|
||||
return count;
|
||||
}
|
||||
|
||||
static ssize_t queue_fua_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return sprintf(page, "%u\n", test_bit(QUEUE_FLAG_FUA, &q->queue_flags));
|
||||
}
|
||||
|
||||
static ssize_t queue_dax_show(struct request_queue *q, char *page)
|
||||
{
|
||||
return queue_var_show(blk_queue_dax(q), page);
|
||||
}
|
||||
|
||||
#define QUEUE_RO_ENTRY(_prefix, _name) \
|
||||
static struct queue_sysfs_entry _prefix##_entry = { \
|
||||
.attr = { .name = _name, .mode = 0444 }, \
|
||||
@ -491,12 +430,18 @@ QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
|
||||
QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
|
||||
QUEUE_RO_ENTRY(queue_discard_max_hw, "discard_max_hw_bytes");
|
||||
QUEUE_RW_ENTRY(queue_discard_max, "discard_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
|
||||
QUEUE_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
|
||||
"atomic_write_boundary_bytes");
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
|
||||
|
||||
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
|
||||
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
|
||||
|
||||
@ -522,9 +467,9 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
|
||||
.show = queue_logical_block_size_show,
|
||||
};
|
||||
|
||||
QUEUE_RW_ENTRY(queue_nonrot, "rotational");
|
||||
QUEUE_RW_ENTRY(queue_rotational, "rotational");
|
||||
QUEUE_RW_ENTRY(queue_iostats, "iostats");
|
||||
QUEUE_RW_ENTRY(queue_random, "add_random");
|
||||
QUEUE_RW_ENTRY(queue_add_random, "add_random");
|
||||
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
|
||||
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
@ -541,20 +486,22 @@ static ssize_t queue_var_store64(s64 *var, const char *page)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
|
||||
static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
|
||||
{
|
||||
if (!wbt_rq_qos(q))
|
||||
if (!wbt_rq_qos(disk->queue))
|
||||
return -EINVAL;
|
||||
|
||||
if (wbt_disabled(q))
|
||||
if (wbt_disabled(disk->queue))
|
||||
return sprintf(page, "0\n");
|
||||
|
||||
return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
|
||||
return sprintf(page, "%llu\n",
|
||||
div_u64(wbt_get_min_lat(disk->queue), 1000));
|
||||
}
|
||||
|
||||
static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
||||
static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
|
||||
size_t count)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct rq_qos *rqos;
|
||||
ssize_t ret;
|
||||
s64 val;
|
||||
@ -567,7 +514,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
||||
|
||||
rqos = wbt_rq_qos(q);
|
||||
if (!rqos) {
|
||||
ret = wbt_init(q->disk);
|
||||
ret = wbt_init(disk);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -585,13 +532,11 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
||||
* ends up either enabling or disabling wbt completely. We can't
|
||||
* have IO inflight if that happens.
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
blk_mq_quiesce_queue(q);
|
||||
|
||||
wbt_set_min_lat(q, val);
|
||||
|
||||
blk_mq_unquiesce_queue(q);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -615,14 +560,18 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_io_min_entry.attr,
|
||||
&queue_io_opt_entry.attr,
|
||||
&queue_discard_granularity_entry.attr,
|
||||
&queue_discard_max_entry.attr,
|
||||
&queue_discard_max_hw_entry.attr,
|
||||
&queue_max_discard_sectors_entry.attr,
|
||||
&queue_max_hw_discard_sectors_entry.attr,
|
||||
&queue_discard_zeroes_data_entry.attr,
|
||||
&queue_atomic_write_max_sectors_entry.attr,
|
||||
&queue_atomic_write_boundary_sectors_entry.attr,
|
||||
&queue_atomic_write_unit_min_entry.attr,
|
||||
&queue_atomic_write_unit_max_entry.attr,
|
||||
&queue_write_same_max_entry.attr,
|
||||
&queue_write_zeroes_max_entry.attr,
|
||||
&queue_max_write_zeroes_sectors_entry.attr,
|
||||
&queue_zone_append_max_entry.attr,
|
||||
&queue_zone_write_granularity_entry.attr,
|
||||
&queue_nonrot_entry.attr,
|
||||
&queue_rotational_entry.attr,
|
||||
&queue_zoned_entry.attr,
|
||||
&queue_nr_zones_entry.attr,
|
||||
&queue_max_open_zones_entry.attr,
|
||||
@ -630,7 +579,7 @@ static struct attribute *queue_attrs[] = {
|
||||
&queue_nomerges_entry.attr,
|
||||
&queue_iostats_entry.attr,
|
||||
&queue_stable_writes_entry.attr,
|
||||
&queue_random_entry.attr,
|
||||
&queue_add_random_entry.attr,
|
||||
&queue_poll_entry.attr,
|
||||
&queue_wc_entry.attr,
|
||||
&queue_fua_entry.attr,
|
||||
@ -699,14 +648,13 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
|
||||
{
|
||||
struct queue_sysfs_entry *entry = to_queue(attr);
|
||||
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
|
||||
struct request_queue *q = disk->queue;
|
||||
ssize_t res;
|
||||
|
||||
if (!entry->show)
|
||||
return -EIO;
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
res = entry->show(q, page);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
mutex_lock(&disk->queue->sysfs_lock);
|
||||
res = entry->show(disk, page);
|
||||
mutex_unlock(&disk->queue->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -722,9 +670,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
|
||||
blk_mq_freeze_queue(q);
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
res = entry->store(q, page, length);
|
||||
res = entry->store(disk, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -704,6 +704,9 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
|
||||
|
||||
/* Calc approx time to dispatch */
|
||||
jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
|
||||
|
||||
/* make sure at least one io can be dispatched after waiting */
|
||||
jiffy_wait = max(jiffy_wait, HZ / iops_limit + 1);
|
||||
return jiffy_wait;
|
||||
}
|
||||
|
||||
|
@ -37,7 +37,7 @@
|
||||
enum wbt_flags {
|
||||
WBT_TRACKED = 1, /* write, tracked for throttling */
|
||||
WBT_READ = 2, /* read */
|
||||
WBT_KSWAPD = 4, /* write, from kswapd */
|
||||
WBT_SWAP = 4, /* write, from swap_writepage() */
|
||||
WBT_DISCARD = 8, /* discard */
|
||||
|
||||
WBT_NR_BITS = 4, /* number of bits */
|
||||
@ -45,7 +45,7 @@ enum wbt_flags {
|
||||
|
||||
enum {
|
||||
WBT_RWQ_BG = 0,
|
||||
WBT_RWQ_KSWAPD,
|
||||
WBT_RWQ_SWAP,
|
||||
WBT_RWQ_DISCARD,
|
||||
WBT_NUM_RWQ,
|
||||
};
|
||||
@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
|
||||
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
|
||||
enum wbt_flags wb_acct)
|
||||
{
|
||||
if (wb_acct & WBT_KSWAPD)
|
||||
return &rwb->rq_wait[WBT_RWQ_KSWAPD];
|
||||
if (wb_acct & WBT_SWAP)
|
||||
return &rwb->rq_wait[WBT_RWQ_SWAP];
|
||||
else if (wb_acct & WBT_DISCARD)
|
||||
return &rwb->rq_wait[WBT_RWQ_DISCARD];
|
||||
|
||||
@ -206,7 +206,7 @@ static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
|
||||
*/
|
||||
if (wb_acct & WBT_DISCARD)
|
||||
limit = rwb->wb_background;
|
||||
else if (test_bit(QUEUE_FLAG_WC, &rwb->rqos.disk->queue->queue_flags) &&
|
||||
else if (blk_queue_write_cache(rwb->rqos.disk->queue) &&
|
||||
!wb_recent_wait(rwb))
|
||||
limit = 0;
|
||||
else
|
||||
@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb)
|
||||
time_before(now, rwb->last_comp + HZ / 10);
|
||||
}
|
||||
|
||||
#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO)
|
||||
#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
|
||||
|
||||
static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
|
||||
{
|
||||
@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
|
||||
|
||||
/*
|
||||
* At this point we know it's a buffered write. If this is
|
||||
* kswapd trying to free memory, or REQ_SYNC is set, then
|
||||
* swap trying to free memory, or REQ_SYNC is set, then
|
||||
* it's WB_SYNC_ALL writeback, and we'll use the max limit for
|
||||
* that. If the write is marked as a background write, then use
|
||||
* the idle limit, or go to normal if we haven't had competing
|
||||
* IO for a bit.
|
||||
*/
|
||||
if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
|
||||
if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
|
||||
limit = rwb->rq_depth.max_depth;
|
||||
else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
|
||||
/*
|
||||
@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
|
||||
if (bio_op(bio) == REQ_OP_READ) {
|
||||
flags = WBT_READ;
|
||||
} else if (wbt_should_throttle(bio)) {
|
||||
if (current_is_kswapd())
|
||||
flags |= WBT_KSWAPD;
|
||||
if (bio->bi_opf & REQ_SWAP)
|
||||
flags |= WBT_SWAP;
|
||||
if (bio_op(bio) == REQ_OP_DISCARD)
|
||||
flags |= WBT_DISCARD;
|
||||
flags |= WBT_TRACKED;
|
||||
|
@ -115,24 +115,6 @@ const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_zone_cond_str);
|
||||
|
||||
/**
|
||||
* bdev_nr_zones - Get number of zones
|
||||
* @bdev: Target device
|
||||
*
|
||||
* Return the total number of zones of a zoned block device. For a block
|
||||
* device without zone capabilities, the number of zones is always 0.
|
||||
*/
|
||||
unsigned int bdev_nr_zones(struct block_device *bdev)
|
||||
{
|
||||
sector_t zone_sectors = bdev_zone_sectors(bdev);
|
||||
|
||||
if (!bdev_is_zoned(bdev))
|
||||
return 0;
|
||||
return (bdev_nr_sectors(bdev) + zone_sectors - 1) >>
|
||||
ilog2(zone_sectors);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bdev_nr_zones);
|
||||
|
||||
/**
|
||||
* blkdev_report_zones - Get zones information
|
||||
* @bdev: Target block device
|
||||
@ -168,77 +150,6 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkdev_report_zones);
|
||||
|
||||
static inline unsigned long *blk_alloc_zone_bitmap(int node,
|
||||
unsigned int nr_zones)
|
||||
{
|
||||
return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
|
||||
GFP_NOIO, node);
|
||||
}
|
||||
|
||||
static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
/*
|
||||
* For an all-zones reset, ignore conventional, empty, read-only
|
||||
* and offline zones.
|
||||
*/
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_NOT_WP:
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
return 0;
|
||||
default:
|
||||
set_bit(idx, (unsigned long *)data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
|
||||
{
|
||||
struct gendisk *disk = bdev->bd_disk;
|
||||
sector_t capacity = bdev_nr_sectors(bdev);
|
||||
sector_t zone_sectors = bdev_zone_sectors(bdev);
|
||||
unsigned long *need_reset;
|
||||
struct bio *bio = NULL;
|
||||
sector_t sector = 0;
|
||||
int ret;
|
||||
|
||||
need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
|
||||
if (!need_reset)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
|
||||
blk_zone_need_reset_cb, need_reset);
|
||||
if (ret < 0)
|
||||
goto out_free_need_reset;
|
||||
|
||||
ret = 0;
|
||||
while (sector < capacity) {
|
||||
if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
|
||||
sector += zone_sectors;
|
||||
continue;
|
||||
}
|
||||
|
||||
bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
|
||||
GFP_KERNEL);
|
||||
bio->bi_iter.bi_sector = sector;
|
||||
sector += zone_sectors;
|
||||
|
||||
/* This may take a while, so be nice to others */
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (bio) {
|
||||
ret = submit_bio_wait(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
out_free_need_reset:
|
||||
kfree(need_reset);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int blkdev_zone_reset_all(struct block_device *bdev)
|
||||
{
|
||||
struct bio bio;
|
||||
@ -265,7 +176,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev)
|
||||
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
|
||||
sector_t sector, sector_t nr_sectors)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
sector_t zone_sectors = bdev_zone_sectors(bdev);
|
||||
sector_t capacity = bdev_nr_sectors(bdev);
|
||||
sector_t end_sector = sector + nr_sectors;
|
||||
@ -293,16 +203,11 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* In the case of a zone reset operation over all zones,
|
||||
* REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
|
||||
* command. For other devices, we emulate this command behavior by
|
||||
* identifying the zones needing a reset.
|
||||
* In the case of a zone reset operation over all zones, use
|
||||
* REQ_OP_ZONE_RESET_ALL.
|
||||
*/
|
||||
if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
|
||||
if (!blk_queue_zone_resetall(q))
|
||||
return blkdev_zone_reset_all_emulated(bdev);
|
||||
if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity)
|
||||
return blkdev_zone_reset_all(bdev);
|
||||
}
|
||||
|
||||
while (sector < end_sector) {
|
||||
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
|
||||
@ -1573,7 +1478,7 @@ void disk_free_zone_resources(struct gendisk *disk)
|
||||
mempool_destroy(disk->zone_wplugs_pool);
|
||||
disk->zone_wplugs_pool = NULL;
|
||||
|
||||
kfree(disk->conv_zones_bitmap);
|
||||
bitmap_free(disk->conv_zones_bitmap);
|
||||
disk->conv_zones_bitmap = NULL;
|
||||
disk->zone_capacity = 0;
|
||||
disk->last_zone_capacity = 0;
|
||||
@ -1650,8 +1555,22 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
lim = queue_limits_start_update(q);
|
||||
|
||||
/*
|
||||
* Some devices can advertize zone resource limits that are larger than
|
||||
* the number of sequential zones of the zoned block device, e.g. a
|
||||
* small ZNS namespace. For such case, assume that the zoned device has
|
||||
* no zone resource limits.
|
||||
*/
|
||||
nr_seq_zones = disk->nr_zones - nr_conv_zones;
|
||||
if (lim.max_open_zones >= nr_seq_zones)
|
||||
lim.max_open_zones = 0;
|
||||
if (lim.max_active_zones >= nr_seq_zones)
|
||||
lim.max_active_zones = 0;
|
||||
|
||||
if (!disk->zone_wplugs_pool)
|
||||
return 0;
|
||||
goto commit;
|
||||
|
||||
/*
|
||||
* If the device has no limit on the maximum number of open and active
|
||||
@ -1660,9 +1579,6 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
* dynamic zone write plug allocation when simultaneously writing to
|
||||
* more zones than the size of the mempool.
|
||||
*/
|
||||
lim = queue_limits_start_update(q);
|
||||
|
||||
nr_seq_zones = disk->nr_zones - nr_conv_zones;
|
||||
pool_size = max(lim.max_open_zones, lim.max_active_zones);
|
||||
if (!pool_size)
|
||||
pool_size = min(BLK_ZONE_WPLUG_DEFAULT_POOL_SIZE, nr_seq_zones);
|
||||
@ -1676,6 +1592,7 @@ static int disk_update_zone_resources(struct gendisk *disk,
|
||||
lim.max_open_zones = 0;
|
||||
}
|
||||
|
||||
commit:
|
||||
return queue_limits_commit_update(q, &lim);
|
||||
}
|
||||
|
||||
@ -1683,7 +1600,6 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||
struct blk_revalidate_zone_args *args)
|
||||
{
|
||||
struct gendisk *disk = args->disk;
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if (zone->capacity != zone->len) {
|
||||
pr_warn("%s: Invalid conventional zone capacity\n",
|
||||
@ -1699,7 +1615,7 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
|
||||
|
||||
if (!args->conv_zones_bitmap) {
|
||||
args->conv_zones_bitmap =
|
||||
blk_alloc_zone_bitmap(q->node, args->nr_zones);
|
||||
bitmap_zalloc(args->nr_zones, GFP_NOIO);
|
||||
if (!args->conv_zones_bitmap)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
22
block/blk.h
22
block/blk.h
@ -98,8 +98,8 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
|
||||
struct bio_vec *vec1, struct bio_vec *vec2)
|
||||
{
|
||||
unsigned long mask = queue_segment_boundary(q);
|
||||
phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
|
||||
phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
|
||||
phys_addr_t addr1 = bvec_phys(vec1);
|
||||
phys_addr_t addr2 = bvec_phys(vec2);
|
||||
|
||||
/*
|
||||
* Merging adjacent physical pages may not work correctly under KMSAN
|
||||
@ -181,9 +181,11 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
|
||||
return queue_max_segments(rq->q);
|
||||
}
|
||||
|
||||
static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
|
||||
enum req_op op)
|
||||
static inline unsigned int blk_queue_get_max_sectors(struct request *rq)
|
||||
{
|
||||
struct request_queue *q = rq->q;
|
||||
enum req_op op = req_op(rq);
|
||||
|
||||
if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
|
||||
return min(q->limits.max_discard_sectors,
|
||||
UINT_MAX >> SECTOR_SHIFT);
|
||||
@ -191,6 +193,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
|
||||
if (unlikely(op == REQ_OP_WRITE_ZEROES))
|
||||
return q->limits.max_write_zeroes_sectors;
|
||||
|
||||
if (rq->cmd_flags & REQ_ATOMIC)
|
||||
return q->limits.atomic_write_max_sectors;
|
||||
|
||||
return q->limits.max_sectors;
|
||||
}
|
||||
|
||||
@ -352,6 +357,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
|
||||
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
|
||||
|
||||
int blk_set_default_limits(struct queue_limits *lim);
|
||||
void blk_apply_bdi_limits(struct backing_dev_info *bdi,
|
||||
struct queue_limits *lim);
|
||||
int blk_dev_init(void);
|
||||
|
||||
/*
|
||||
@ -393,7 +400,7 @@ struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q);
|
||||
static inline bool blk_queue_may_bounce(struct request_queue *q)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_BOUNCE) &&
|
||||
q->limits.bounce == BLK_BOUNCE_HIGH &&
|
||||
(q->limits.features & BLK_FEAT_BOUNCE_HIGH) &&
|
||||
max_low_pfn >= max_pfn;
|
||||
}
|
||||
|
||||
@ -673,4 +680,9 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
|
||||
const struct blk_holder_ops *hops, struct file *bdev_file);
|
||||
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
|
||||
|
||||
void blk_integrity_generate(struct bio *bio);
|
||||
void blk_integrity_verify(struct bio *bio);
|
||||
void blk_integrity_prepare(struct request *rq);
|
||||
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
|
||||
|
||||
#endif /* BLK_INTERNAL_H */
|
||||
|
@ -709,24 +709,25 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t elv_iosched_store(struct request_queue *q, const char *buf,
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
int ret;
|
||||
|
||||
if (!elv_support_iosched(q))
|
||||
if (!elv_support_iosched(disk->queue))
|
||||
return count;
|
||||
|
||||
strscpy(elevator_name, buf, sizeof(elevator_name));
|
||||
ret = elevator_change(q, strstrip(elevator_name));
|
||||
ret = elevator_change(disk->queue, strstrip(elevator_name));
|
||||
if (!ret)
|
||||
return count;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t elv_iosched_show(struct request_queue *q, char *name)
|
||||
ssize_t elv_iosched_show(struct gendisk *disk, char *name)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct elevator_queue *eq = q->elevator;
|
||||
struct elevator_type *cur = NULL, *e;
|
||||
int len = 0;
|
||||
|
@ -147,8 +147,8 @@ extern void elv_unregister(struct elevator_type *);
|
||||
/*
|
||||
* io scheduler sysfs switching
|
||||
*/
|
||||
extern ssize_t elv_iosched_show(struct request_queue *, char *);
|
||||
extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
|
||||
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
|
||||
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
|
||||
|
||||
extern bool elv_bio_merge_ok(struct request *, struct bio *);
|
||||
extern struct elevator_queue *elevator_alloc(struct request_queue *,
|
||||
|
25
block/fops.c
25
block/fops.c
@ -34,9 +34,12 @@ static blk_opf_t dio_bio_write_op(struct kiocb *iocb)
|
||||
return opf;
|
||||
}
|
||||
|
||||
static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
|
||||
struct iov_iter *iter)
|
||||
static bool blkdev_dio_invalid(struct block_device *bdev, loff_t pos,
|
||||
struct iov_iter *iter, bool is_atomic)
|
||||
{
|
||||
if (is_atomic && !generic_atomic_write_valid(iter, pos))
|
||||
return true;
|
||||
|
||||
return pos & (bdev_logical_block_size(bdev) - 1) ||
|
||||
!bdev_iter_is_aligned(bdev, iter);
|
||||
}
|
||||
@ -72,6 +75,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
|
||||
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
|
||||
bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
|
||||
bio.bi_ioprio = iocb->ki_ioprio;
|
||||
if (iocb->ki_flags & IOCB_ATOMIC)
|
||||
bio.bi_opf |= REQ_ATOMIC;
|
||||
|
||||
ret = bio_iov_iter_get_pages(&bio, iter);
|
||||
if (unlikely(ret))
|
||||
@ -343,6 +348,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
||||
task_io_account_write(bio->bi_iter.bi_size);
|
||||
}
|
||||
|
||||
if (iocb->ki_flags & IOCB_ATOMIC)
|
||||
bio->bi_opf |= REQ_ATOMIC;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
bio->bi_opf |= REQ_NOWAIT;
|
||||
|
||||
@ -359,12 +367,13 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
|
||||
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
{
|
||||
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
|
||||
bool is_atomic = iocb->ki_flags & IOCB_ATOMIC;
|
||||
unsigned int nr_pages;
|
||||
|
||||
if (!iov_iter_count(iter))
|
||||
return 0;
|
||||
|
||||
if (blkdev_dio_unaligned(bdev, iocb->ki_pos, iter))
|
||||
if (blkdev_dio_invalid(bdev, iocb->ki_pos, iter, is_atomic))
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
|
||||
@ -373,6 +382,8 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
|
||||
return __blkdev_direct_IO_simple(iocb, iter, bdev,
|
||||
nr_pages);
|
||||
return __blkdev_direct_IO_async(iocb, iter, bdev, nr_pages);
|
||||
} else if (is_atomic) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return __blkdev_direct_IO(iocb, iter, bdev, bio_max_segs(nr_pages));
|
||||
}
|
||||
@ -383,10 +394,11 @@ static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
|
||||
struct block_device *bdev = I_BDEV(inode);
|
||||
loff_t isize = i_size_read(inode);
|
||||
|
||||
iomap->bdev = bdev;
|
||||
iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
|
||||
if (offset >= isize)
|
||||
return -EIO;
|
||||
|
||||
iomap->bdev = bdev;
|
||||
iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
|
||||
iomap->type = IOMAP_MAPPED;
|
||||
iomap->addr = iomap->offset;
|
||||
iomap->length = isize - iomap->offset;
|
||||
@ -612,6 +624,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
|
||||
if (!bdev)
|
||||
return -ENXIO;
|
||||
|
||||
if (bdev_can_atomic_write(bdev) && filp->f_flags & O_DIRECT)
|
||||
filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
|
||||
|
||||
ret = bdev_open(bdev, mode, filp->private_data, NULL, filp);
|
||||
if (ret)
|
||||
blkdev_put_no_open(bdev);
|
||||
|
@ -524,7 +524,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
|
||||
disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
|
||||
}
|
||||
|
||||
disk_update_readahead(disk);
|
||||
blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
|
||||
disk_add_events(disk);
|
||||
set_bit(GD_ADDED, &disk->state);
|
||||
return 0;
|
||||
|
@ -224,7 +224,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
|
||||
goto fail;
|
||||
|
||||
err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
|
||||
BLKDEV_ZERO_NOUNMAP);
|
||||
BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE);
|
||||
|
||||
fail:
|
||||
filemap_invalidate_unlock(bdev->bd_mapping);
|
||||
|
@ -487,6 +487,20 @@ unlock:
|
||||
return rq;
|
||||
}
|
||||
|
||||
/*
|
||||
* 'depth' is a number in the range 1..INT_MAX representing a number of
|
||||
* requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
|
||||
* 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
|
||||
* Values larger than q->nr_requests have the same effect as q->nr_requests.
|
||||
*/
|
||||
static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
|
||||
{
|
||||
struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
|
||||
const unsigned int nrr = hctx->queue->nr_requests;
|
||||
|
||||
return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called by __blk_mq_alloc_request(). The shallow_depth value set by this
|
||||
* function is used by __blk_mq_get_tag().
|
||||
@ -503,7 +517,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
|
||||
* Throttle asynchronous requests and writes such that these requests
|
||||
* do not block the allocation of synchronous requests.
|
||||
*/
|
||||
data->shallow_depth = dd->async_depth;
|
||||
data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
|
||||
}
|
||||
|
||||
/* Called by blk_mq_update_nr_requests(). */
|
||||
@ -513,9 +527,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
|
||||
struct deadline_data *dd = q->elevator->elevator_data;
|
||||
struct blk_mq_tags *tags = hctx->sched_tags;
|
||||
|
||||
dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
|
||||
dd->async_depth = q->nr_requests;
|
||||
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
|
||||
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
|
||||
}
|
||||
|
||||
/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
|
||||
|
296
block/t10-pi.c
296
block/t10-pi.c
@ -11,17 +11,23 @@
|
||||
#include <linux/module.h>
|
||||
#include <net/checksum.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include "blk.h"
|
||||
|
||||
typedef __be16 (csum_fn) (__be16, void *, unsigned int);
|
||||
struct blk_integrity_iter {
|
||||
void *prot_buf;
|
||||
void *data_buf;
|
||||
sector_t seed;
|
||||
unsigned int data_size;
|
||||
unsigned short interval;
|
||||
const char *disk_name;
|
||||
};
|
||||
|
||||
static __be16 t10_pi_crc_fn(__be16 crc, void *data, unsigned int len)
|
||||
{
|
||||
return cpu_to_be16(crc_t10dif_update(be16_to_cpu(crc), data, len));
|
||||
}
|
||||
|
||||
static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len)
|
||||
static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len,
|
||||
unsigned char csum_type)
|
||||
{
|
||||
if (csum_type == BLK_INTEGRITY_CSUM_IP)
|
||||
return (__force __be16)ip_compute_csum(data, len);
|
||||
return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -29,48 +35,44 @@ static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len)
|
||||
* 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
|
||||
* tag.
|
||||
*/
|
||||
static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter,
|
||||
csum_fn *fn, enum t10_dif_type type)
|
||||
static void t10_pi_generate(struct blk_integrity_iter *iter,
|
||||
struct blk_integrity *bi)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct t10_pi_tuple *pi = iter->prot_buf + offset;
|
||||
|
||||
pi->guard_tag = fn(0, iter->data_buf, iter->interval);
|
||||
pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval,
|
||||
bi->csum_type);
|
||||
if (offset)
|
||||
pi->guard_tag = fn(pi->guard_tag, iter->prot_buf,
|
||||
offset);
|
||||
pi->guard_tag = t10_pi_csum(pi->guard_tag,
|
||||
iter->prot_buf, offset, bi->csum_type);
|
||||
pi->app_tag = 0;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION)
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
|
||||
else
|
||||
pi->ref_tag = 0;
|
||||
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += iter->tuple_size;
|
||||
iter->prot_buf += bi->tuple_size;
|
||||
iter->seed++;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
|
||||
csum_fn *fn, enum t10_dif_type type)
|
||||
struct blk_integrity *bi)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
BUG_ON(type == T10_PI_TYPE0_PROTECTION);
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
struct t10_pi_tuple *pi = iter->prot_buf + offset;
|
||||
__be16 csum;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION ||
|
||||
type == T10_PI_TYPE2_PROTECTION) {
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE)
|
||||
goto next;
|
||||
|
||||
@ -82,15 +84,17 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
|
||||
iter->seed, be32_to_cpu(pi->ref_tag));
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
} else if (type == T10_PI_TYPE3_PROTECTION) {
|
||||
} else {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE &&
|
||||
pi->ref_tag == T10_PI_REF_ESCAPE)
|
||||
goto next;
|
||||
}
|
||||
|
||||
csum = fn(0, iter->data_buf, iter->interval);
|
||||
csum = t10_pi_csum(0, iter->data_buf, iter->interval,
|
||||
bi->csum_type);
|
||||
if (offset)
|
||||
csum = fn(csum, iter->prot_buf, offset);
|
||||
csum = t10_pi_csum(csum, iter->prot_buf, offset,
|
||||
bi->csum_type);
|
||||
|
||||
if (pi->guard_tag != csum) {
|
||||
pr_err("%s: guard tag error at sector %llu " \
|
||||
@ -102,33 +106,13 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter,
|
||||
|
||||
next:
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += iter->tuple_size;
|
||||
iter->prot_buf += bi->tuple_size;
|
||||
iter->seed++;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* t10_pi_type1_prepare - prepare PI prior submitting request to device
|
||||
* @rq: request with PI that should be prepared
|
||||
@ -141,7 +125,7 @@ static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
|
||||
*/
|
||||
static void t10_pi_type1_prepare(struct request *rq)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u32 ref_tag = t10_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
@ -192,7 +176,7 @@ static void t10_pi_type1_prepare(struct request *rq)
|
||||
*/
|
||||
static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u32 ref_tag = t10_pi_ref_tag(rq);
|
||||
@ -225,81 +209,15 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
|
||||
}
|
||||
|
||||
/* Type 3 does not have a reference tag so no remapping is required. */
|
||||
static void t10_pi_type3_prepare(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
/* Type 3 does not have a reference tag so no remapping is required. */
|
||||
static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
}
|
||||
|
||||
const struct blk_integrity_profile t10_pi_type1_crc = {
|
||||
.name = "T10-DIF-TYPE1-CRC",
|
||||
.generate_fn = t10_pi_type1_generate_crc,
|
||||
.verify_fn = t10_pi_type1_verify_crc,
|
||||
.prepare_fn = t10_pi_type1_prepare,
|
||||
.complete_fn = t10_pi_type1_complete,
|
||||
};
|
||||
EXPORT_SYMBOL(t10_pi_type1_crc);
|
||||
|
||||
const struct blk_integrity_profile t10_pi_type1_ip = {
|
||||
.name = "T10-DIF-TYPE1-IP",
|
||||
.generate_fn = t10_pi_type1_generate_ip,
|
||||
.verify_fn = t10_pi_type1_verify_ip,
|
||||
.prepare_fn = t10_pi_type1_prepare,
|
||||
.complete_fn = t10_pi_type1_complete,
|
||||
};
|
||||
EXPORT_SYMBOL(t10_pi_type1_ip);
|
||||
|
||||
const struct blk_integrity_profile t10_pi_type3_crc = {
|
||||
.name = "T10-DIF-TYPE3-CRC",
|
||||
.generate_fn = t10_pi_type3_generate_crc,
|
||||
.verify_fn = t10_pi_type3_verify_crc,
|
||||
.prepare_fn = t10_pi_type3_prepare,
|
||||
.complete_fn = t10_pi_type3_complete,
|
||||
};
|
||||
EXPORT_SYMBOL(t10_pi_type3_crc);
|
||||
|
||||
const struct blk_integrity_profile t10_pi_type3_ip = {
|
||||
.name = "T10-DIF-TYPE3-IP",
|
||||
.generate_fn = t10_pi_type3_generate_ip,
|
||||
.verify_fn = t10_pi_type3_verify_ip,
|
||||
.prepare_fn = t10_pi_type3_prepare,
|
||||
.complete_fn = t10_pi_type3_complete,
|
||||
};
|
||||
EXPORT_SYMBOL(t10_pi_type3_ip);
|
||||
|
||||
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
|
||||
{
|
||||
return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter,
|
||||
enum t10_dif_type type)
|
||||
static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
|
||||
struct blk_integrity *bi)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0 ; i < iter->data_size ; i += iter->interval) {
|
||||
@ -311,17 +229,15 @@ static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter,
|
||||
iter->prot_buf, offset);
|
||||
pi->app_tag = 0;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION)
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
||||
put_unaligned_be48(iter->seed, pi->ref_tag);
|
||||
else
|
||||
put_unaligned_be48(0ULL, pi->ref_tag);
|
||||
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += iter->tuple_size;
|
||||
iter->prot_buf += bi->tuple_size;
|
||||
iter->seed++;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static bool ext_pi_ref_escape(u8 *ref_tag)
|
||||
@ -332,9 +248,9 @@ static bool ext_pi_ref_escape(u8 *ref_tag)
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
enum t10_dif_type type)
|
||||
struct blk_integrity *bi)
|
||||
{
|
||||
u8 offset = iter->pi_offset;
|
||||
u8 offset = bi->pi_offset;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < iter->data_size; i += iter->interval) {
|
||||
@ -342,7 +258,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
u64 ref, seed;
|
||||
__be64 csum;
|
||||
|
||||
if (type == T10_PI_TYPE1_PROTECTION) {
|
||||
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE)
|
||||
goto next;
|
||||
|
||||
@ -353,7 +269,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
iter->disk_name, seed, ref);
|
||||
return BLK_STS_PROTECTION;
|
||||
}
|
||||
} else if (type == T10_PI_TYPE3_PROTECTION) {
|
||||
} else {
|
||||
if (pi->app_tag == T10_PI_APP_ESCAPE &&
|
||||
ext_pi_ref_escape(pi->ref_tag))
|
||||
goto next;
|
||||
@ -374,26 +290,16 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter,
|
||||
|
||||
next:
|
||||
iter->data_buf += iter->interval;
|
||||
iter->prot_buf += iter->tuple_size;
|
||||
iter->prot_buf += bi->tuple_size;
|
||||
iter->seed++;
|
||||
}
|
||||
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_type1_verify_crc64(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return ext_pi_crc64_verify(iter, T10_PI_TYPE1_PROTECTION);
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter)
|
||||
{
|
||||
return ext_pi_crc64_generate(iter, T10_PI_TYPE1_PROTECTION);
|
||||
}
|
||||
|
||||
static void ext_pi_type1_prepare(struct request *rq)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u64 ref_tag = ext_pi_ref_tag(rq);
|
||||
u8 offset = bi->pi_offset;
|
||||
@ -433,7 +339,7 @@ static void ext_pi_type1_prepare(struct request *rq)
|
||||
|
||||
static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->integrity;
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
unsigned intervals = nr_bytes >> bi->interval_exp;
|
||||
const int tuple_sz = bi->tuple_size;
|
||||
u64 ref_tag = ext_pi_ref_tag(rq);
|
||||
@ -467,33 +373,105 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_type3_verify_crc64(struct blk_integrity_iter *iter)
|
||||
void blk_integrity_generate(struct bio *bio)
|
||||
{
|
||||
return ext_pi_crc64_verify(iter, T10_PI_TYPE3_PROTECTION);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity_iter iter;
|
||||
struct bvec_iter bviter;
|
||||
struct bio_vec bv;
|
||||
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.seed = bio->bi_iter.bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
bio_for_each_segment(bv, bio, bviter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
|
||||
iter.data_buf = kaddr;
|
||||
iter.data_size = bv.bv_len;
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
ext_pi_crc64_generate(&iter, bi);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
t10_pi_generate(&iter, bi);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
kunmap_local(kaddr);
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t ext_pi_type3_generate_crc64(struct blk_integrity_iter *iter)
|
||||
void blk_integrity_verify(struct bio *bio)
|
||||
{
|
||||
return ext_pi_crc64_generate(iter, T10_PI_TYPE3_PROTECTION);
|
||||
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct blk_integrity_iter iter;
|
||||
struct bvec_iter bviter;
|
||||
struct bio_vec bv;
|
||||
|
||||
/*
|
||||
* At the moment verify is called bi_iter has been advanced during split
|
||||
* and completion, so use the copy created during submission here.
|
||||
*/
|
||||
iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
|
||||
iter.interval = 1 << bi->interval_exp;
|
||||
iter.seed = bip->bio_iter.bi_sector;
|
||||
iter.prot_buf = bvec_virt(bip->bip_vec);
|
||||
__bio_for_each_segment(bv, bio, bviter, bip->bio_iter) {
|
||||
void *kaddr = bvec_kmap_local(&bv);
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
iter.data_buf = kaddr;
|
||||
iter.data_size = bv.bv_len;
|
||||
switch (bi->csum_type) {
|
||||
case BLK_INTEGRITY_CSUM_CRC64:
|
||||
ret = ext_pi_crc64_verify(&iter, bi);
|
||||
break;
|
||||
case BLK_INTEGRITY_CSUM_CRC:
|
||||
case BLK_INTEGRITY_CSUM_IP:
|
||||
ret = t10_pi_verify(&iter, bi);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
kunmap_local(kaddr);
|
||||
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const struct blk_integrity_profile ext_pi_type1_crc64 = {
|
||||
.name = "EXT-DIF-TYPE1-CRC64",
|
||||
.generate_fn = ext_pi_type1_generate_crc64,
|
||||
.verify_fn = ext_pi_type1_verify_crc64,
|
||||
.prepare_fn = ext_pi_type1_prepare,
|
||||
.complete_fn = ext_pi_type1_complete,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(ext_pi_type1_crc64);
|
||||
void blk_integrity_prepare(struct request *rq)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
|
||||
const struct blk_integrity_profile ext_pi_type3_crc64 = {
|
||||
.name = "EXT-DIF-TYPE3-CRC64",
|
||||
.generate_fn = ext_pi_type3_generate_crc64,
|
||||
.verify_fn = ext_pi_type3_verify_crc64,
|
||||
.prepare_fn = t10_pi_type3_prepare,
|
||||
.complete_fn = t10_pi_type3_complete,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(ext_pi_type3_crc64);
|
||||
if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
return;
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
|
||||
ext_pi_type1_prepare(rq);
|
||||
else
|
||||
t10_pi_type1_prepare(rq);
|
||||
}
|
||||
|
||||
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
struct blk_integrity *bi = &rq->q->limits.integrity;
|
||||
|
||||
if (!(bi->flags & BLK_INTEGRITY_REF_TAG))
|
||||
return;
|
||||
|
||||
if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64)
|
||||
ext_pi_type1_complete(rq, nr_bytes);
|
||||
else
|
||||
t10_pi_type1_complete(rq, nr_bytes);
|
||||
}
|
||||
|
||||
MODULE_DESCRIPTION("T10 Protection Information module");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -1024,7 +1024,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
|
||||
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
|
||||
struct ata_device *dev)
|
||||
{
|
||||
struct request_queue *q = sdev->request_queue;
|
||||
int depth = 1;
|
||||
|
||||
if (!ata_id_has_unload(dev->id))
|
||||
@ -1038,7 +1037,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
|
||||
sdev->sector_size = ATA_SECT_SIZE;
|
||||
|
||||
/* set DMA padding */
|
||||
blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
|
||||
lim->dma_pad_mask = ATA_DMA_PAD_SZ - 1;
|
||||
|
||||
/* make room for appending the drain */
|
||||
lim->max_segments--;
|
||||
|
@ -816,7 +816,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
|
||||
/* OHare has issues with non cache aligned DMA on some chipsets */
|
||||
if (priv->kind == controller_ohare) {
|
||||
lim->dma_alignment = 31;
|
||||
blk_queue_update_dma_pad(sdev->request_queue, 31);
|
||||
lim->dma_pad_mask = 31;
|
||||
|
||||
/* Tell the world about it */
|
||||
ata_dev_info(dev, "OHare alignment limits applied\n");
|
||||
@ -831,7 +831,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
|
||||
if (priv->kind == controller_sh_ata6 || priv->kind == controller_k2_ata6) {
|
||||
/* Allright these are bad, apply restrictions */
|
||||
lim->dma_alignment = 15;
|
||||
blk_queue_update_dma_pad(sdev->request_queue, 15);
|
||||
lim->dma_pad_mask = 15;
|
||||
|
||||
/* We enable MWI and hack cache line size directly here, this
|
||||
* is specific to this chipset and not normal values, we happen
|
||||
|
@ -354,6 +354,15 @@ config VIRTIO_BLK
|
||||
This is the virtual block driver for virtio. It can be used with
|
||||
QEMU based VMMs (like KVM or Xen). Say Y or M.
|
||||
|
||||
config BLK_DEV_RUST_NULL
|
||||
tristate "Rust null block driver (Experimental)"
|
||||
depends on RUST
|
||||
help
|
||||
This is the Rust implementation of the null block driver. For now it
|
||||
is only a minimal stub.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config BLK_DEV_RBD
|
||||
tristate "Rados block device (RBD)"
|
||||
depends on INET && BLOCK
|
||||
|
@ -9,6 +9,9 @@
|
||||
# needed for trace events
|
||||
ccflags-y += -I$(src)
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
|
||||
rnull_mod-y := rnull.o
|
||||
|
||||
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
|
||||
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
|
||||
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
|
||||
|
@ -232,6 +232,7 @@ static DEFINE_MUTEX(amiflop_mutex);
|
||||
static unsigned long int fd_def_df0 = FD_DD_3; /* default for df0 if it doesn't identify */
|
||||
|
||||
module_param(fd_def_df0, ulong, 0);
|
||||
MODULE_DESCRIPTION("Amiga floppy driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
/*
|
||||
@ -1776,10 +1777,13 @@ static const struct blk_mq_ops amiflop_mq_ops = {
|
||||
|
||||
static int fd_alloc_disk(int drive, int system)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
int err;
|
||||
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
|
@ -337,6 +337,7 @@ aoeblk_gdalloc(void *vp)
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = aoe_maxsectors,
|
||||
.io_opt = SZ_2M,
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
ulong flags;
|
||||
int late = 0;
|
||||
|
@ -1992,9 +1992,12 @@ static const struct blk_mq_ops ataflop_mq_ops = {
|
||||
|
||||
static int ataflop_alloc_disk(unsigned int drive, unsigned int type)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, NULL, NULL);
|
||||
disk = blk_mq_alloc_disk(&unit[drive].tag_set, &lim, NULL);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
|
||||
@ -2197,4 +2200,5 @@ static void __exit atari_floppy_exit(void)
|
||||
module_init(atari_floppy_init)
|
||||
module_exit(atari_floppy_exit)
|
||||
|
||||
MODULE_DESCRIPTION("Atari floppy driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -296,6 +296,7 @@ static int max_part = 1;
|
||||
module_param(max_part, int, 0444);
|
||||
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
|
||||
|
||||
MODULE_DESCRIPTION("Ram backed block device driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
|
||||
MODULE_ALIAS("rd");
|
||||
@ -335,6 +336,8 @@ static int brd_alloc(int i)
|
||||
.max_hw_discard_sectors = UINT_MAX,
|
||||
.max_discard_segments = 1,
|
||||
.discard_granularity = PAGE_SIZE,
|
||||
.features = BLK_FEAT_SYNCHRONOUS |
|
||||
BLK_FEAT_NOWAIT,
|
||||
};
|
||||
|
||||
list_for_each_entry(brd, &brd_devices, brd_list)
|
||||
@ -366,10 +369,6 @@ static int brd_alloc(int i)
|
||||
strscpy(disk->disk_name, buf, DISK_NAME_LEN);
|
||||
set_capacity(disk, rd_size * 2);
|
||||
|
||||
/* Tell the block layer that this is not a rotational device */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
|
||||
err = add_disk(disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
@ -2697,6 +2697,9 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
* connect.
|
||||
*/
|
||||
.max_hw_sectors = DRBD_MAX_BIO_SIZE_SAFE >> 8,
|
||||
.features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
|
||||
BLK_FEAT_ROTATIONAL |
|
||||
BLK_FEAT_STABLE_WRITES,
|
||||
};
|
||||
|
||||
device = minor_to_device(minor);
|
||||
@ -2735,9 +2738,6 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
|
||||
sprintf(disk->disk_name, "drbd%d", minor);
|
||||
disk->private_data = device;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
|
||||
blk_queue_write_cache(disk->queue, true, true);
|
||||
|
||||
device->md_io.page = alloc_page(GFP_KERNEL);
|
||||
if (!device->md_io.page)
|
||||
goto out_no_io_page;
|
||||
|
@ -4517,6 +4517,7 @@ static int floppy_alloc_disk(unsigned int drive, unsigned int type)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = 64,
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
|
||||
@ -5016,6 +5017,7 @@ module_param(floppy, charp, 0);
|
||||
module_param(FLOPPY_IRQ, int, 0);
|
||||
module_param(FLOPPY_DMA, int, 0);
|
||||
MODULE_AUTHOR("Alain L. Knaff");
|
||||
MODULE_DESCRIPTION("Normal floppy disk support");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
/* This doesn't actually get used other than for module information */
|
||||
|
@ -211,13 +211,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
|
||||
if (lo->lo_state == Lo_bound)
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
lo->use_dio = use_dio;
|
||||
if (use_dio) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
if (use_dio)
|
||||
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
|
||||
} else {
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
else
|
||||
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
|
||||
}
|
||||
if (lo->lo_state == Lo_bound)
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
}
|
||||
@ -939,24 +936,6 @@ static void loop_free_idle_workers_timer(struct timer_list *timer)
|
||||
return loop_free_idle_workers(lo, false);
|
||||
}
|
||||
|
||||
static void loop_update_rotational(struct loop_device *lo)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *file_inode = file->f_mapping->host;
|
||||
struct block_device *file_bdev = file_inode->i_sb->s_bdev;
|
||||
struct request_queue *q = lo->lo_queue;
|
||||
bool nonrot = true;
|
||||
|
||||
/* not all filesystems (e.g. tmpfs) have a sb->s_bdev */
|
||||
if (file_bdev)
|
||||
nonrot = bdev_nonrot(file_bdev);
|
||||
|
||||
if (nonrot)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
}
|
||||
|
||||
/**
|
||||
* loop_set_status_from_info - configure device from loop_info
|
||||
* @lo: struct loop_device to configure
|
||||
@ -998,16 +977,39 @@ loop_set_status_from_info(struct loop_device *lo,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize,
|
||||
bool update_discard_settings)
|
||||
static unsigned short loop_default_blocksize(struct loop_device *lo,
|
||||
struct block_device *backing_bdev)
|
||||
{
|
||||
/* In case of direct I/O, match underlying block size */
|
||||
if ((lo->lo_backing_file->f_flags & O_DIRECT) && backing_bdev)
|
||||
return bdev_logical_block_size(backing_bdev);
|
||||
return SECTOR_SIZE;
|
||||
}
|
||||
|
||||
static int loop_reconfigure_limits(struct loop_device *lo, unsigned short bsize)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct block_device *backing_bdev = NULL;
|
||||
struct queue_limits lim;
|
||||
|
||||
if (S_ISBLK(inode->i_mode))
|
||||
backing_bdev = I_BDEV(inode);
|
||||
else if (inode->i_sb->s_bdev)
|
||||
backing_bdev = inode->i_sb->s_bdev;
|
||||
|
||||
if (!bsize)
|
||||
bsize = loop_default_blocksize(lo, backing_bdev);
|
||||
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
lim.logical_block_size = bsize;
|
||||
lim.physical_block_size = bsize;
|
||||
lim.io_min = bsize;
|
||||
if (update_discard_settings)
|
||||
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL);
|
||||
if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY))
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
if (backing_bdev && !bdev_nonrot(backing_bdev))
|
||||
lim.features |= BLK_FEAT_ROTATIONAL;
|
||||
loop_config_discard(lo, &lim);
|
||||
return queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
}
|
||||
@ -1017,12 +1019,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
const struct loop_config *config)
|
||||
{
|
||||
struct file *file = fget(config->fd);
|
||||
struct inode *inode;
|
||||
struct address_space *mapping;
|
||||
int error;
|
||||
loff_t size;
|
||||
bool partscan;
|
||||
unsigned short bsize;
|
||||
bool is_loop;
|
||||
|
||||
if (!file)
|
||||
@ -1055,19 +1055,12 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
goto out_unlock;
|
||||
|
||||
mapping = file->f_mapping;
|
||||
inode = mapping->host;
|
||||
|
||||
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
|
||||
error = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (config->block_size) {
|
||||
error = blk_validate_block_size(config->block_size);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
error = loop_set_status_from_info(lo, &config->info);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
@ -1098,22 +1091,10 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
|
||||
lo->old_gfp_mask = mapping_gfp_mask(mapping);
|
||||
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
|
||||
|
||||
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
|
||||
blk_queue_write_cache(lo->lo_queue, true, false);
|
||||
|
||||
if (config->block_size)
|
||||
bsize = config->block_size;
|
||||
else if ((lo->lo_backing_file->f_flags & O_DIRECT) && inode->i_sb->s_bdev)
|
||||
/* In case of direct I/O, match underlying block size */
|
||||
bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
|
||||
else
|
||||
bsize = 512;
|
||||
|
||||
error = loop_reconfigure_limits(lo, bsize, true);
|
||||
if (WARN_ON_ONCE(error))
|
||||
error = loop_reconfigure_limits(lo, config->block_size);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
loop_update_rotational(lo);
|
||||
loop_update_dio(lo);
|
||||
loop_sysfs_init(lo);
|
||||
|
||||
@ -1154,22 +1135,12 @@ out_putf:
|
||||
return error;
|
||||
}
|
||||
|
||||
static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
static void __loop_clr_fd(struct loop_device *lo)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
struct file *filp;
|
||||
gfp_t gfp = lo->old_gfp_mask;
|
||||
|
||||
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
|
||||
blk_queue_write_cache(lo->lo_queue, false, false);
|
||||
|
||||
/*
|
||||
* Freeze the request queue when unbinding on a live file descriptor and
|
||||
* thus an open device. When called from ->release we are guaranteed
|
||||
* that there is no I/O in progress already.
|
||||
*/
|
||||
if (!release)
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
spin_lock_irq(&lo->lo_lock);
|
||||
filp = lo->lo_backing_file;
|
||||
lo->lo_backing_file = NULL;
|
||||
@ -1179,7 +1150,14 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
lo->lo_offset = 0;
|
||||
lo->lo_sizelimit = 0;
|
||||
memset(lo->lo_file_name, 0, LO_NAME_SIZE);
|
||||
loop_reconfigure_limits(lo, 512, false);
|
||||
|
||||
/* reset the block size to the default */
|
||||
lim = queue_limits_start_update(lo->lo_queue);
|
||||
lim.logical_block_size = SECTOR_SIZE;
|
||||
lim.physical_block_size = SECTOR_SIZE;
|
||||
lim.io_min = SECTOR_SIZE;
|
||||
queue_limits_commit_update(lo->lo_queue, &lim);
|
||||
|
||||
invalidate_disk(lo->lo_disk);
|
||||
loop_sysfs_exit(lo);
|
||||
/* let user-space know about this change */
|
||||
@ -1187,8 +1165,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
mapping_set_gfp_mask(filp->f_mapping, gfp);
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
module_put(THIS_MODULE);
|
||||
if (!release)
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
disk_force_media_change(lo->lo_disk);
|
||||
|
||||
@ -1203,11 +1179,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
* must be at least one and it can only become zero when the
|
||||
* current holder is released.
|
||||
*/
|
||||
if (!release)
|
||||
mutex_lock(&lo->lo_disk->open_mutex);
|
||||
err = bdev_disk_changed(lo->lo_disk, false);
|
||||
if (!release)
|
||||
mutex_unlock(&lo->lo_disk->open_mutex);
|
||||
if (err)
|
||||
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
|
||||
__func__, lo->lo_number, err);
|
||||
@ -1256,24 +1228,16 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||
return -ENXIO;
|
||||
}
|
||||
/*
|
||||
* If we've explicitly asked to tear down the loop device,
|
||||
* and it has an elevated reference count, set it for auto-teardown when
|
||||
* the last reference goes away. This stops $!~#$@ udev from
|
||||
* preventing teardown because it decided that it needs to run blkid on
|
||||
* the loopback device whenever they appear. xfstests is notorious for
|
||||
* failing tests because blkid via udev races with a losetup
|
||||
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
|
||||
* command to fail with EBUSY.
|
||||
* Mark the device for removing the backing device on last close.
|
||||
* If we are the only opener, also switch the state to roundown here to
|
||||
* prevent new openers from coming in.
|
||||
*/
|
||||
if (disk_openers(lo->lo_disk) > 1) {
|
||||
|
||||
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
|
||||
loop_global_unlock(lo, true);
|
||||
return 0;
|
||||
}
|
||||
if (disk_openers(lo->lo_disk) == 1)
|
||||
lo->lo_state = Lo_rundown;
|
||||
loop_global_unlock(lo, true);
|
||||
|
||||
__loop_clr_fd(lo, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1500,10 +1464,6 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
if (lo->lo_state != Lo_bound)
|
||||
return -ENXIO;
|
||||
|
||||
err = blk_validate_block_size(arg);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (lo->lo_queue->limits.logical_block_size == arg)
|
||||
return 0;
|
||||
|
||||
@ -1511,7 +1471,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
invalidate_bdev(lo->lo_device);
|
||||
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
err = loop_reconfigure_limits(lo, arg, false);
|
||||
err = loop_reconfigure_limits(lo, arg);
|
||||
loop_update_dio(lo);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
@ -1740,25 +1700,43 @@ static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int lo_open(struct gendisk *disk, blk_mode_t mode)
|
||||
{
|
||||
struct loop_device *lo = disk->private_data;
|
||||
int err;
|
||||
|
||||
err = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown)
|
||||
err = -ENXIO;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void lo_release(struct gendisk *disk)
|
||||
{
|
||||
struct loop_device *lo = disk->private_data;
|
||||
bool need_clear = false;
|
||||
|
||||
if (disk_openers(disk) > 0)
|
||||
return;
|
||||
/*
|
||||
* Clear the backing device information if this is the last close of
|
||||
* a device that's been marked for auto clear, or on which LOOP_CLR_FD
|
||||
* has been called.
|
||||
*/
|
||||
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
|
||||
if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR))
|
||||
lo->lo_state = Lo_rundown;
|
||||
|
||||
need_clear = (lo->lo_state == Lo_rundown);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
/*
|
||||
* In autoclear mode, stop the loop thread
|
||||
* and remove configuration after last close.
|
||||
*/
|
||||
__loop_clr_fd(lo, true);
|
||||
return;
|
||||
}
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
if (need_clear)
|
||||
__loop_clr_fd(lo);
|
||||
}
|
||||
|
||||
static void lo_free_disk(struct gendisk *disk)
|
||||
@ -1775,6 +1753,7 @@ static void lo_free_disk(struct gendisk *disk)
|
||||
|
||||
static const struct block_device_operations lo_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = lo_open,
|
||||
.release = lo_release,
|
||||
.ioctl = lo_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
@ -1853,6 +1832,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
|
||||
device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
|
||||
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH));
|
||||
|
||||
MODULE_DESCRIPTION("Loopback device support");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
|
||||
|
||||
@ -2059,14 +2039,6 @@ static int loop_add(int i)
|
||||
}
|
||||
lo->lo_queue = lo->lo_disk->queue;
|
||||
|
||||
/*
|
||||
* By default, we do buffer IO, so it doesn't make sense to enable
|
||||
* merge because the I/O submitted to backing file is handled page by
|
||||
* page. For directio mode, merge does help to dispatch bigger request
|
||||
* to underlayer disk. We will enable merge once directio is enabled.
|
||||
*/
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
|
||||
/*
|
||||
* Disable partition scanning by default. The in-kernel partition
|
||||
* scanning can be requested individually per-device during its
|
||||
|
@ -3485,8 +3485,6 @@ skip_create_disk:
|
||||
goto start_service_thread;
|
||||
|
||||
/* Set device limits. */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, dd->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, dd->queue);
|
||||
dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
|
||||
|
||||
/* Set the capacity of the device in 512 byte sectors. */
|
||||
|
@ -150,8 +150,6 @@ static int __init n64cart_probe(struct platform_device *pdev)
|
||||
set_capacity(disk, size >> SECTOR_SHIFT);
|
||||
set_disk_ro(disk, 1);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
|
||||
err = add_disk(disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
@ -342,6 +342,14 @@ static int __nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
lim.max_hw_discard_sectors = UINT_MAX;
|
||||
else
|
||||
lim.max_hw_discard_sectors = 0;
|
||||
if (!(nbd->config->flags & NBD_FLAG_SEND_FLUSH)) {
|
||||
lim.features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
|
||||
} else if (nbd->config->flags & NBD_FLAG_SEND_FUA) {
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
|
||||
} else {
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
lim.features &= ~BLK_FEAT_FUA;
|
||||
}
|
||||
lim.logical_block_size = blksize;
|
||||
lim.physical_block_size = blksize;
|
||||
error = queue_limits_commit_update(nbd->disk->queue, &lim);
|
||||
@ -1279,19 +1287,10 @@ static void nbd_bdev_reset(struct nbd_device *nbd)
|
||||
|
||||
static void nbd_parse_flags(struct nbd_device *nbd)
|
||||
{
|
||||
struct nbd_config *config = nbd->config;
|
||||
if (config->flags & NBD_FLAG_READ_ONLY)
|
||||
if (nbd->config->flags & NBD_FLAG_READ_ONLY)
|
||||
set_disk_ro(nbd->disk, true);
|
||||
else
|
||||
set_disk_ro(nbd->disk, false);
|
||||
if (config->flags & NBD_FLAG_SEND_FLUSH) {
|
||||
if (config->flags & NBD_FLAG_SEND_FUA)
|
||||
blk_queue_write_cache(nbd->disk->queue, true, true);
|
||||
else
|
||||
blk_queue_write_cache(nbd->disk->queue, true, false);
|
||||
}
|
||||
else
|
||||
blk_queue_write_cache(nbd->disk->queue, false, false);
|
||||
}
|
||||
|
||||
static void send_disconnects(struct nbd_device *nbd)
|
||||
@ -1801,7 +1800,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = 65536,
|
||||
.max_user_sectors = 256,
|
||||
.io_opt = 256 << SECTOR_SHIFT,
|
||||
.max_segments = USHRT_MAX,
|
||||
.max_segment_size = UINT_MAX,
|
||||
};
|
||||
@ -1861,11 +1860,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
goto out_err_disk;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell the block layer that we are not a rotational device
|
||||
*/
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
|
||||
mutex_init(&nbd->config_lock);
|
||||
refcount_set(&nbd->config_refs, 0);
|
||||
/*
|
||||
|
@ -77,7 +77,7 @@ enum {
|
||||
NULL_IRQ_TIMER = 2,
|
||||
};
|
||||
|
||||
static bool g_virt_boundary = false;
|
||||
static bool g_virt_boundary;
|
||||
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
|
||||
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
|
||||
|
||||
@ -227,7 +227,7 @@ MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (
|
||||
|
||||
static bool g_fua = true;
|
||||
module_param_named(fua, g_fua, bool, 0444);
|
||||
MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
|
||||
MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true");
|
||||
|
||||
static unsigned int g_mbps;
|
||||
module_param_named(mbps, g_mbps, uint, 0444);
|
||||
@ -262,6 +262,10 @@ module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444
|
||||
MODULE_PARM_DESC(zone_append_max_sectors,
|
||||
"Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
|
||||
|
||||
static bool g_zone_full;
|
||||
module_param_named(zone_full, g_zone_full, bool, S_IRUGO);
|
||||
MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false");
|
||||
|
||||
static struct nullb_device *null_alloc_dev(void);
|
||||
static void null_free_dev(struct nullb_device *dev);
|
||||
static void null_del_dev(struct nullb *nullb);
|
||||
@ -458,6 +462,7 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
|
||||
NULLB_DEVICE_ATTR(zone_full, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
|
||||
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
|
||||
@ -610,6 +615,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
|
||||
&nullb_device_attr_zone_append_max_sectors,
|
||||
&nullb_device_attr_zone_readonly,
|
||||
&nullb_device_attr_zone_offline,
|
||||
&nullb_device_attr_zone_full,
|
||||
&nullb_device_attr_virt_boundary,
|
||||
&nullb_device_attr_no_sched,
|
||||
&nullb_device_attr_shared_tags,
|
||||
@ -700,7 +706,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
|
||||
"shared_tags,size,submit_queues,use_per_node_hctx,"
|
||||
"virt_boundary,zoned,zone_capacity,zone_max_active,"
|
||||
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
|
||||
"zone_size,zone_append_max_sectors\n");
|
||||
"zone_size,zone_append_max_sectors,zone_full\n");
|
||||
}
|
||||
|
||||
CONFIGFS_ATTR_RO(memb_group_, features);
|
||||
@ -781,6 +787,7 @@ static struct nullb_device *null_alloc_dev(void)
|
||||
dev->zone_max_open = g_zone_max_open;
|
||||
dev->zone_max_active = g_zone_max_active;
|
||||
dev->zone_append_max_sectors = g_zone_append_max_sectors;
|
||||
dev->zone_full = g_zone_full;
|
||||
dev->virt_boundary = g_virt_boundary;
|
||||
dev->no_sched = g_no_sched;
|
||||
dev->shared_tags = g_shared_tags;
|
||||
@ -1824,9 +1831,6 @@ static int null_validate_conf(struct nullb_device *dev)
|
||||
dev->queue_mode = NULL_Q_MQ;
|
||||
}
|
||||
|
||||
if (blk_validate_block_size(dev->blocksize))
|
||||
return -EINVAL;
|
||||
|
||||
if (dev->use_per_node_hctx) {
|
||||
if (dev->submit_queues != nr_online_nodes)
|
||||
dev->submit_queues = nr_online_nodes;
|
||||
@ -1928,6 +1932,13 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
goto out_cleanup_tags;
|
||||
}
|
||||
|
||||
if (dev->cache_size > 0) {
|
||||
set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
if (dev->fua)
|
||||
lim.features |= BLK_FEAT_FUA;
|
||||
}
|
||||
|
||||
nullb->disk = blk_mq_alloc_disk(nullb->tag_set, &lim, nullb);
|
||||
if (IS_ERR(nullb->disk)) {
|
||||
rv = PTR_ERR(nullb->disk);
|
||||
@ -1940,13 +1951,7 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
nullb_setup_bwtimer(nullb);
|
||||
}
|
||||
|
||||
if (dev->cache_size > 0) {
|
||||
set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
|
||||
blk_queue_write_cache(nullb->q, true, dev->fua);
|
||||
}
|
||||
|
||||
nullb->q->queuedata = nullb;
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
|
||||
|
||||
rv = ida_alloc(&nullb_indexes, GFP_KERNEL);
|
||||
if (rv < 0)
|
||||
|
@ -101,6 +101,7 @@ struct nullb_device {
|
||||
bool memory_backed; /* if data is stored in memory */
|
||||
bool discard; /* if support discard */
|
||||
bool zoned; /* if device is zoned */
|
||||
bool zone_full; /* Initialize zones to be full */
|
||||
bool virt_boundary; /* virtual boundary on/off for the device */
|
||||
bool no_sched; /* no IO scheduler for the device */
|
||||
bool shared_tags; /* share tag set between devices for blk-mq */
|
||||
|
@ -145,7 +145,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
zone = &dev->zones[i];
|
||||
|
||||
null_init_zone_lock(dev, zone);
|
||||
zone->start = zone->wp = sector;
|
||||
zone->start = sector;
|
||||
if (zone->start + dev->zone_size_sects > dev_capacity_sects)
|
||||
zone->len = dev_capacity_sects - zone->start;
|
||||
else
|
||||
@ -153,12 +153,18 @@ int null_init_zoned_dev(struct nullb_device *dev,
|
||||
zone->capacity =
|
||||
min_t(sector_t, zone->len, zone_capacity_sects);
|
||||
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
|
||||
if (dev->zone_full) {
|
||||
zone->cond = BLK_ZONE_COND_FULL;
|
||||
zone->wp = zone->start + zone->capacity;
|
||||
} else{
|
||||
zone->cond = BLK_ZONE_COND_EMPTY;
|
||||
zone->wp = zone->start;
|
||||
}
|
||||
|
||||
sector += dev->zone_size_sects;
|
||||
}
|
||||
|
||||
lim->zoned = true;
|
||||
lim->features |= BLK_FEAT_ZONED;
|
||||
lim->chunk_sectors = dev->zone_size_sects;
|
||||
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
|
||||
lim->max_open_zones = dev->zone_max_open;
|
||||
@ -171,9 +177,6 @@ int null_register_zoned_dev(struct nullb *nullb)
|
||||
struct request_queue *q = nullb->q;
|
||||
struct gendisk *disk = nullb->disk;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
|
||||
disk->nr_zones = bdev_nr_zones(disk->part0);
|
||||
|
||||
pr_info("%s: using %s zone append\n",
|
||||
disk->disk_name,
|
||||
queue_emulates_zone_append(q) ? "emulated" : "native");
|
||||
|
@ -2622,6 +2622,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = PACKET_MAX_SECTORS,
|
||||
.logical_block_size = CD_FRAMESIZE,
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
int idx;
|
||||
int ret = -ENOMEM;
|
||||
|
@ -388,9 +388,9 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
.max_segments = -1,
|
||||
.max_segment_size = dev->bounce_size,
|
||||
.dma_alignment = dev->blk_size - 1,
|
||||
.features = BLK_FEAT_WRITE_CACHE |
|
||||
BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
|
||||
struct request_queue *queue;
|
||||
struct gendisk *gendisk;
|
||||
|
||||
if (dev->blk_size < 512) {
|
||||
@ -447,10 +447,6 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
|
||||
goto fail_free_tag_set;
|
||||
}
|
||||
|
||||
queue = gendisk->queue;
|
||||
|
||||
blk_queue_write_cache(queue, true, false);
|
||||
|
||||
priv->gendisk = gendisk;
|
||||
gendisk->major = ps3disk_major;
|
||||
gendisk->first_minor = devidx * PS3DISK_MINORS;
|
||||
|
@ -4949,14 +4949,12 @@ static const struct blk_mq_ops rbd_mq_ops = {
|
||||
static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
{
|
||||
struct gendisk *disk;
|
||||
struct request_queue *q;
|
||||
unsigned int objset_bytes =
|
||||
rbd_dev->layout.object_size * rbd_dev->layout.stripe_count;
|
||||
struct queue_limits lim = {
|
||||
.max_hw_sectors = objset_bytes >> SECTOR_SHIFT,
|
||||
.max_user_sectors = objset_bytes >> SECTOR_SHIFT,
|
||||
.io_opt = objset_bytes,
|
||||
.io_min = rbd_dev->opts->alloc_size,
|
||||
.io_opt = rbd_dev->opts->alloc_size,
|
||||
.max_segments = USHRT_MAX,
|
||||
.max_segment_size = UINT_MAX,
|
||||
};
|
||||
@ -4980,12 +4978,14 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
lim.max_write_zeroes_sectors = objset_bytes >> SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
|
||||
lim.features |= BLK_FEAT_STABLE_WRITES;
|
||||
|
||||
disk = blk_mq_alloc_disk(&rbd_dev->tag_set, &lim, rbd_dev);
|
||||
if (IS_ERR(disk)) {
|
||||
err = PTR_ERR(disk);
|
||||
goto out_tag_set;
|
||||
}
|
||||
q = disk->queue;
|
||||
|
||||
snprintf(disk->disk_name, sizeof(disk->disk_name), RBD_DRV_NAME "%d",
|
||||
rbd_dev->dev_id);
|
||||
@ -4997,13 +4997,6 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
||||
disk->minors = RBD_MINORS_PER_MAJOR;
|
||||
disk->fops = &rbd_bd_ops;
|
||||
disk->private_data = rbd_dev;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
/* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
|
||||
|
||||
if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC))
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
|
||||
rbd_dev->disk = disk;
|
||||
|
||||
return 0;
|
||||
|
@ -475,7 +475,7 @@ void rnbd_clt_remove_dev_symlink(struct rnbd_clt_dev *dev)
|
||||
}
|
||||
}
|
||||
|
||||
static struct kobj_type rnbd_dev_ktype = {
|
||||
static const struct kobj_type rnbd_dev_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_groups = rnbd_dev_groups,
|
||||
};
|
||||
|
@ -1352,10 +1352,6 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
|
||||
if (dev->access_mode == RNBD_ACCESS_RO)
|
||||
set_disk_ro(dev->gd, true);
|
||||
|
||||
/*
|
||||
* Network device does not need rotational
|
||||
*/
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, dev->queue);
|
||||
err = add_disk(dev->gd);
|
||||
if (err)
|
||||
put_disk(dev->gd);
|
||||
@ -1389,18 +1385,18 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
|
||||
le32_to_cpu(rsp->max_discard_sectors);
|
||||
}
|
||||
|
||||
if (rsp->cache_policy & RNBD_WRITEBACK) {
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
if (rsp->cache_policy & RNBD_FUA)
|
||||
lim.features |= BLK_FEAT_FUA;
|
||||
}
|
||||
|
||||
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, &lim, dev);
|
||||
if (IS_ERR(dev->gd))
|
||||
return PTR_ERR(dev->gd);
|
||||
dev->queue = dev->gd->queue;
|
||||
rnbd_init_mq_hw_queues(dev);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
|
||||
blk_queue_write_cache(dev->queue,
|
||||
!!(rsp->cache_policy & RNBD_WRITEBACK),
|
||||
!!(rsp->cache_policy & RNBD_FUA));
|
||||
|
||||
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
|
||||
}
|
||||
|
||||
|
@ -33,7 +33,7 @@ static void rnbd_srv_dev_release(struct kobject *kobj)
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static struct kobj_type dev_ktype = {
|
||||
static const struct kobj_type dev_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.release = rnbd_srv_dev_release
|
||||
};
|
||||
@ -184,7 +184,7 @@ static void rnbd_srv_sess_dev_release(struct kobject *kobj)
|
||||
rnbd_destroy_sess_dev(sess_dev, sess_dev->keep_id);
|
||||
}
|
||||
|
||||
static struct kobj_type rnbd_srv_sess_dev_ktype = {
|
||||
static const struct kobj_type rnbd_srv_sess_dev_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.release = rnbd_srv_sess_dev_release,
|
||||
};
|
||||
|
73
drivers/block/rnull.rs
Normal file
73
drivers/block/rnull.rs
Normal file
@ -0,0 +1,73 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
//! This is a Rust implementation of the C null block driver.
|
||||
//!
|
||||
//! Supported features:
|
||||
//!
|
||||
//! - blk-mq interface
|
||||
//! - direct completion
|
||||
//! - block size 4k
|
||||
//!
|
||||
//! The driver is not configurable.
|
||||
|
||||
use kernel::{
|
||||
alloc::flags,
|
||||
block::mq::{
|
||||
self,
|
||||
gen_disk::{self, GenDisk},
|
||||
Operations, TagSet,
|
||||
},
|
||||
error::Result,
|
||||
new_mutex, pr_info,
|
||||
prelude::*,
|
||||
sync::{Arc, Mutex},
|
||||
types::ARef,
|
||||
};
|
||||
|
||||
module! {
|
||||
type: NullBlkModule,
|
||||
name: "rnull_mod",
|
||||
author: "Andreas Hindborg",
|
||||
license: "GPL v2",
|
||||
}
|
||||
|
||||
struct NullBlkModule {
|
||||
_disk: Pin<Box<Mutex<GenDisk<NullBlkDevice>>>>,
|
||||
}
|
||||
|
||||
impl kernel::Module for NullBlkModule {
|
||||
fn init(_module: &'static ThisModule) -> Result<Self> {
|
||||
pr_info!("Rust null_blk loaded\n");
|
||||
let tagset = Arc::pin_init(TagSet::new(1, 256, 1), flags::GFP_KERNEL)?;
|
||||
|
||||
let disk = gen_disk::GenDiskBuilder::new()
|
||||
.capacity_sectors(4096 << 11)
|
||||
.logical_block_size(4096)?
|
||||
.physical_block_size(4096)?
|
||||
.rotational(false)
|
||||
.build(format_args!("rnullb{}", 0), tagset)?;
|
||||
|
||||
let disk = Box::pin_init(new_mutex!(disk, "nullb:disk"), flags::GFP_KERNEL)?;
|
||||
|
||||
Ok(Self { _disk: disk })
|
||||
}
|
||||
}
|
||||
|
||||
struct NullBlkDevice;
|
||||
|
||||
#[vtable]
|
||||
impl Operations for NullBlkDevice {
|
||||
#[inline(always)]
|
||||
fn queue_rq(rq: ARef<mq::Request<Self>>, _is_last: bool) -> Result {
|
||||
mq::Request::end_ok(rq)
|
||||
.map_err(|_e| kernel::error::code::EIO)
|
||||
// We take no refcounts on the request, so we expect to be able to
|
||||
// end the request. The request reference must be unique at this
|
||||
// point, and so `end_ok` cannot fail.
|
||||
.expect("Fatal error - expected to be able to end request");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn commit_rqs() {}
|
||||
}
|
@ -791,6 +791,7 @@ static int probe_disk(struct vdc_port *port)
|
||||
.seg_boundary_mask = PAGE_SIZE - 1,
|
||||
.max_segment_size = PAGE_SIZE,
|
||||
.max_segments = port->ring_cookies,
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
struct request_queue *q;
|
||||
struct gendisk *g;
|
||||
|
@ -787,6 +787,9 @@ static void swim_cleanup_floppy_disk(struct floppy_state *fs)
|
||||
|
||||
static int swim_floppy_init(struct swim_priv *swd)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
int err;
|
||||
int drive;
|
||||
struct swim __iomem *base = swd->base;
|
||||
@ -820,7 +823,7 @@ static int swim_floppy_init(struct swim_priv *swd)
|
||||
goto exit_put_disks;
|
||||
|
||||
swd->unit[drive].disk =
|
||||
blk_mq_alloc_disk(&swd->unit[drive].tag_set, NULL,
|
||||
blk_mq_alloc_disk(&swd->unit[drive].tag_set, &lim,
|
||||
&swd->unit[drive]);
|
||||
if (IS_ERR(swd->unit[drive].disk)) {
|
||||
blk_mq_free_tag_set(&swd->unit[drive].tag_set);
|
||||
|
@ -1189,6 +1189,9 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
|
||||
static int swim3_attach(struct macio_dev *mdev,
|
||||
const struct of_device_id *match)
|
||||
{
|
||||
struct queue_limits lim = {
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
struct floppy_state *fs;
|
||||
struct gendisk *disk;
|
||||
int rc;
|
||||
@ -1210,7 +1213,7 @@ static int swim3_attach(struct macio_dev *mdev,
|
||||
if (rc)
|
||||
goto out_unregister;
|
||||
|
||||
disk = blk_mq_alloc_disk(&fs->tag_set, NULL, fs);
|
||||
disk = blk_mq_alloc_disk(&fs->tag_set, &lim, fs);
|
||||
if (IS_ERR(disk)) {
|
||||
rc = PTR_ERR(disk);
|
||||
goto out_free_tag_set;
|
||||
|
@ -248,8 +248,6 @@ static int ublk_dev_param_zoned_validate(const struct ublk_device *ub)
|
||||
|
||||
static void ublk_dev_param_zoned_apply(struct ublk_device *ub)
|
||||
{
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ub->ub_disk->queue);
|
||||
|
||||
ub->ub_disk->nr_zones = ublk_get_nr_zones(ub);
|
||||
}
|
||||
|
||||
@ -484,16 +482,8 @@ static inline unsigned ublk_pos_to_tag(loff_t pos)
|
||||
|
||||
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||
{
|
||||
struct request_queue *q = ub->ub_disk->queue;
|
||||
const struct ublk_param_basic *p = &ub->params.basic;
|
||||
|
||||
blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
|
||||
p->attrs & UBLK_ATTR_FUA);
|
||||
if (p->attrs & UBLK_ATTR_ROTATIONAL)
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
if (p->attrs & UBLK_ATTR_READ_ONLY)
|
||||
set_disk_ro(ub->ub_disk, true);
|
||||
|
||||
@ -2204,12 +2194,21 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
lim.zoned = true;
|
||||
lim.features |= BLK_FEAT_ZONED;
|
||||
lim.max_active_zones = p->max_active_zones;
|
||||
lim.max_open_zones = p->max_open_zones;
|
||||
lim.max_zone_append_sectors = p->max_zone_append_sectors;
|
||||
}
|
||||
|
||||
if (ub->params.basic.attrs & UBLK_ATTR_VOLATILE_CACHE) {
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
if (ub->params.basic.attrs & UBLK_ATTR_FUA)
|
||||
lim.features |= BLK_FEAT_FUA;
|
||||
}
|
||||
|
||||
if (ub->params.basic.attrs & UBLK_ATTR_ROTATIONAL)
|
||||
lim.features |= BLK_FEAT_ROTATIONAL;
|
||||
|
||||
if (wait_for_completion_interruptible(&ub->completion) != 0)
|
||||
return -EINTR;
|
||||
|
||||
@ -3017,4 +3016,5 @@ module_param_cb(ublks_max, &ublk_max_ublks_ops, &ublks_max, 0644);
|
||||
MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
|
||||
|
||||
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
|
||||
MODULE_DESCRIPTION("Userspace block device");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
|
||||
|
||||
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
|
||||
|
||||
lim->zoned = true;
|
||||
lim->features |= BLK_FEAT_ZONED;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
zoned.max_open_zones, &v);
|
||||
@ -1089,14 +1089,6 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev)
|
||||
return writeback;
|
||||
}
|
||||
|
||||
static void virtblk_update_cache_mode(struct virtio_device *vdev)
|
||||
{
|
||||
u8 writeback = virtblk_get_cache_mode(vdev);
|
||||
struct virtio_blk *vblk = vdev->priv;
|
||||
|
||||
blk_queue_write_cache(vblk->disk->queue, writeback, false);
|
||||
}
|
||||
|
||||
static const char *const virtblk_cache_types[] = {
|
||||
"write through", "write back"
|
||||
};
|
||||
@ -1108,6 +1100,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
struct virtio_blk *vblk = disk->private_data;
|
||||
struct virtio_device *vdev = vblk->vdev;
|
||||
struct queue_limits lim;
|
||||
int i;
|
||||
|
||||
BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
|
||||
@ -1116,7 +1109,17 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
|
||||
return i;
|
||||
|
||||
virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
|
||||
virtblk_update_cache_mode(vdev);
|
||||
|
||||
lim = queue_limits_start_update(disk->queue);
|
||||
if (virtblk_get_cache_mode(vdev))
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
else
|
||||
lim.features &= ~BLK_FEAT_WRITE_CACHE;
|
||||
blk_mq_freeze_queue(disk->queue);
|
||||
i = queue_limits_commit_update(disk->queue, &lim);
|
||||
blk_mq_unfreeze_queue(disk->queue);
|
||||
if (i)
|
||||
return i;
|
||||
return count;
|
||||
}
|
||||
|
||||
@ -1247,7 +1250,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct virtio_device *vdev = vblk->vdev;
|
||||
u32 v, blk_size, max_size, sg_elems, opt_io_size;
|
||||
u32 v, max_size, sg_elems, opt_io_size;
|
||||
u32 max_discard_segs = 0;
|
||||
u32 discard_granularity = 0;
|
||||
u16 min_io_size;
|
||||
@ -1286,46 +1289,36 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
|
||||
lim->max_segment_size = max_size;
|
||||
|
||||
/* Host can optionally specify the block size of the device */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
|
||||
virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
|
||||
struct virtio_blk_config, blk_size,
|
||||
&blk_size);
|
||||
if (!err) {
|
||||
err = blk_validate_block_size(blk_size);
|
||||
if (err) {
|
||||
dev_err(&vdev->dev,
|
||||
"virtio_blk: invalid block size: 0x%x\n",
|
||||
blk_size);
|
||||
return err;
|
||||
}
|
||||
|
||||
lim->logical_block_size = blk_size;
|
||||
} else
|
||||
blk_size = lim->logical_block_size;
|
||||
&lim->logical_block_size);
|
||||
|
||||
/* Use topology information if available */
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, physical_block_exp,
|
||||
&physical_block_exp);
|
||||
if (!err && physical_block_exp)
|
||||
lim->physical_block_size = blk_size * (1 << physical_block_exp);
|
||||
lim->physical_block_size =
|
||||
lim->logical_block_size * (1 << physical_block_exp);
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, alignment_offset,
|
||||
&alignment_offset);
|
||||
if (!err && alignment_offset)
|
||||
lim->alignment_offset = blk_size * alignment_offset;
|
||||
lim->alignment_offset =
|
||||
lim->logical_block_size * alignment_offset;
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, min_io_size,
|
||||
&min_io_size);
|
||||
if (!err && min_io_size)
|
||||
lim->io_min = blk_size * min_io_size;
|
||||
lim->io_min = lim->logical_block_size * min_io_size;
|
||||
|
||||
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
|
||||
struct virtio_blk_config, opt_io_size,
|
||||
&opt_io_size);
|
||||
if (!err && opt_io_size)
|
||||
lim->io_opt = blk_size * opt_io_size;
|
||||
lim->io_opt = lim->logical_block_size * opt_io_size;
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
@ -1419,7 +1412,7 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
|
||||
lim->discard_granularity =
|
||||
discard_granularity << SECTOR_SHIFT;
|
||||
else
|
||||
lim->discard_granularity = blk_size;
|
||||
lim->discard_granularity = lim->logical_block_size;
|
||||
}
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_ZONED)) {
|
||||
@ -1448,7 +1441,10 @@ static int virtblk_read_limits(struct virtio_blk *vblk,
|
||||
static int virtblk_probe(struct virtio_device *vdev)
|
||||
{
|
||||
struct virtio_blk *vblk;
|
||||
struct queue_limits lim = { };
|
||||
struct queue_limits lim = {
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
.logical_block_size = SECTOR_SIZE,
|
||||
};
|
||||
int err, index;
|
||||
unsigned int queue_depth;
|
||||
|
||||
@ -1512,6 +1508,9 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
if (err)
|
||||
goto out_free_tags;
|
||||
|
||||
if (virtblk_get_cache_mode(vdev))
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
|
||||
vblk->disk = blk_mq_alloc_disk(&vblk->tag_set, &lim, vblk);
|
||||
if (IS_ERR(vblk->disk)) {
|
||||
err = PTR_ERR(vblk->disk);
|
||||
@ -1527,9 +1526,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
vblk->disk->fops = &virtblk_fops;
|
||||
vblk->index = index;
|
||||
|
||||
/* configure queue flush support */
|
||||
virtblk_update_cache_mode(vdev);
|
||||
|
||||
/* If disk is read-only in the host, the guest should obey */
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
|
||||
set_disk_ro(vblk->disk, 1);
|
||||
@ -1541,8 +1537,8 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
* All steps that follow use the VQs therefore they need to be
|
||||
* placed after the virtio_device_ready() call above.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && lim.zoned) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, vblk->disk->queue);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
(lim.features & BLK_FEAT_ZONED)) {
|
||||
err = blk_revalidate_disk_zones(vblk->disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
@ -1563,5 +1563,6 @@ static void __exit xen_blkif_fini(void)
|
||||
|
||||
module_exit(xen_blkif_fini);
|
||||
|
||||
MODULE_DESCRIPTION("Virtual block device back-end driver");
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_ALIAS("xen-backend:vbd");
|
||||
|
@ -788,6 +788,11 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
|
||||
* A barrier request a superset of FUA, so we can
|
||||
* implement it the same way. (It's also a FLUSH+FUA,
|
||||
* since it is guaranteed ordered WRT previous writes.)
|
||||
*
|
||||
* Note that can end up here with a FUA write and the
|
||||
* flags cleared. This happens when the flag was
|
||||
* run-time disabled after a failing I/O, and we'll
|
||||
* simplify submit it as a normal write.
|
||||
*/
|
||||
if (info->feature_flush && info->feature_fua)
|
||||
ring_req->operation =
|
||||
@ -795,8 +800,6 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
|
||||
else if (info->feature_flush)
|
||||
ring_req->operation =
|
||||
BLKIF_OP_FLUSH_DISKCACHE;
|
||||
else
|
||||
ring_req->operation = 0;
|
||||
}
|
||||
ring_req->u.rw.nr_segments = num_grant;
|
||||
if (unlikely(require_extra_req)) {
|
||||
@ -887,16 +890,6 @@ static inline void flush_requests(struct blkfront_ring_info *rinfo)
|
||||
notify_remote_via_irq(rinfo->irq);
|
||||
}
|
||||
|
||||
static inline bool blkif_request_flush_invalid(struct request *req,
|
||||
struct blkfront_info *info)
|
||||
{
|
||||
return (blk_rq_is_passthrough(req) ||
|
||||
((req_op(req) == REQ_OP_FLUSH) &&
|
||||
!info->feature_flush) ||
|
||||
((req->cmd_flags & REQ_FUA) &&
|
||||
!info->feature_fua));
|
||||
}
|
||||
|
||||
static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *qd)
|
||||
{
|
||||
@ -908,12 +901,22 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
rinfo = get_rinfo(info, qid);
|
||||
blk_mq_start_request(qd->rq);
|
||||
spin_lock_irqsave(&rinfo->ring_lock, flags);
|
||||
|
||||
/*
|
||||
* Check if the backend actually supports flushes.
|
||||
*
|
||||
* While the block layer won't send us flushes if we don't claim to
|
||||
* support them, the Xen protocol allows the backend to revoke support
|
||||
* at any time. That is of course a really bad idea and dangerous, but
|
||||
* has been allowed for 10+ years. In that case we simply clear the
|
||||
* flags, and directly return here for an empty flush and ignore the
|
||||
* FUA flag later on.
|
||||
*/
|
||||
if (unlikely(req_op(qd->rq) == REQ_OP_FLUSH && !info->feature_flush))
|
||||
goto complete;
|
||||
|
||||
if (RING_FULL(&rinfo->ring))
|
||||
goto out_busy;
|
||||
|
||||
if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
|
||||
goto out_err;
|
||||
|
||||
if (blkif_queue_request(qd->rq, rinfo))
|
||||
goto out_busy;
|
||||
|
||||
@ -921,14 +924,14 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
|
||||
return BLK_STS_OK;
|
||||
|
||||
out_err:
|
||||
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
out_busy:
|
||||
blk_mq_stop_hw_queue(hctx);
|
||||
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
|
||||
return BLK_STS_DEV_RESOURCE;
|
||||
complete:
|
||||
spin_unlock_irqrestore(&rinfo->ring_lock, flags);
|
||||
blk_mq_end_request(qd->rq, BLK_STS_OK);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
static void blkif_complete_rq(struct request *rq)
|
||||
@ -956,6 +959,12 @@ static void blkif_set_queue_limits(const struct blkfront_info *info,
|
||||
lim->max_secure_erase_sectors = UINT_MAX;
|
||||
}
|
||||
|
||||
if (info->feature_flush) {
|
||||
lim->features |= BLK_FEAT_WRITE_CACHE;
|
||||
if (info->feature_fua)
|
||||
lim->features |= BLK_FEAT_FUA;
|
||||
}
|
||||
|
||||
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
||||
lim->logical_block_size = info->sector_size;
|
||||
lim->physical_block_size = info->physical_sector_size;
|
||||
@ -984,8 +993,6 @@ static const char *flush_info(struct blkfront_info *info)
|
||||
|
||||
static void xlvbd_flush(struct blkfront_info *info)
|
||||
{
|
||||
blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
|
||||
info->feature_fua ? true : false);
|
||||
pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
|
||||
info->gd->disk_name, flush_info(info),
|
||||
"persistent grants:", info->feature_persistent ?
|
||||
@ -1063,8 +1070,7 @@ static char *encode_disk_name(char *ptr, unsigned int n)
|
||||
}
|
||||
|
||||
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
struct blkfront_info *info, u16 sector_size,
|
||||
unsigned int physical_sector_size)
|
||||
struct blkfront_info *info)
|
||||
{
|
||||
struct queue_limits lim = {};
|
||||
struct gendisk *gd;
|
||||
@ -1139,7 +1145,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
err = PTR_ERR(gd);
|
||||
goto out_free_tag_set;
|
||||
}
|
||||
blk_queue_flag_set(QUEUE_FLAG_VIRT, gd->queue);
|
||||
|
||||
strcpy(gd->disk_name, DEV_NAME);
|
||||
ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
|
||||
@ -1159,8 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
|
||||
|
||||
info->rq = gd->queue;
|
||||
info->gd = gd;
|
||||
info->sector_size = sector_size;
|
||||
info->physical_sector_size = physical_sector_size;
|
||||
|
||||
xlvbd_flush(info);
|
||||
|
||||
@ -1605,8 +1608,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
blkif_req(req)->error = BLK_STS_NOTSUPP;
|
||||
info->feature_discard = 0;
|
||||
info->feature_secdiscard = 0;
|
||||
blk_queue_max_discard_sectors(rq, 0);
|
||||
blk_queue_max_secure_erase_sectors(rq, 0);
|
||||
blk_queue_disable_discard(rq);
|
||||
blk_queue_disable_secure_erase(rq);
|
||||
}
|
||||
break;
|
||||
case BLKIF_OP_FLUSH_DISKCACHE:
|
||||
@ -1627,7 +1630,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
||||
blkif_req(req)->error = BLK_STS_OK;
|
||||
info->feature_fua = 0;
|
||||
info->feature_flush = 0;
|
||||
xlvbd_flush(info);
|
||||
}
|
||||
fallthrough;
|
||||
case BLKIF_OP_READ:
|
||||
@ -2315,8 +2317,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
|
||||
static void blkfront_connect(struct blkfront_info *info)
|
||||
{
|
||||
unsigned long long sectors;
|
||||
unsigned long sector_size;
|
||||
unsigned int physical_sector_size;
|
||||
int err, i;
|
||||
struct blkfront_ring_info *rinfo;
|
||||
|
||||
@ -2355,7 +2355,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
||||
"sectors", "%llu", §ors,
|
||||
"info", "%u", &info->vdisk_info,
|
||||
"sector-size", "%lu", §or_size,
|
||||
"sector-size", "%lu", &info->sector_size,
|
||||
NULL);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err,
|
||||
@ -2369,9 +2369,9 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
* provide this. Assume physical sector size to be the same as
|
||||
* sector_size in that case.
|
||||
*/
|
||||
physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
|
||||
info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
|
||||
"physical-sector-size",
|
||||
sector_size);
|
||||
info->sector_size);
|
||||
blkfront_gather_backend_features(info);
|
||||
for_each_rinfo(info, rinfo, i) {
|
||||
err = blkfront_setup_indirect(rinfo);
|
||||
@ -2383,8 +2383,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
||||
}
|
||||
}
|
||||
|
||||
err = xlvbd_alloc_gendisk(sectors, info, sector_size,
|
||||
physical_sector_size);
|
||||
err = xlvbd_alloc_gendisk(sectors, info);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
||||
info->xbdev->otherend);
|
||||
|
@ -409,4 +409,5 @@ static void __exit z2_exit(void)
|
||||
|
||||
module_init(z2_init);
|
||||
module_exit(z2_exit);
|
||||
MODULE_DESCRIPTION("Amiga Zorro II ramdisk driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -2208,6 +2208,8 @@ static int zram_add(void)
|
||||
#if ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE
|
||||
.max_write_zeroes_sectors = UINT_MAX,
|
||||
#endif
|
||||
.features = BLK_FEAT_STABLE_WRITES |
|
||||
BLK_FEAT_SYNCHRONOUS,
|
||||
};
|
||||
struct zram *zram;
|
||||
int ret, device_id;
|
||||
@ -2245,10 +2247,6 @@ static int zram_add(void)
|
||||
|
||||
/* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
|
||||
set_capacity(zram->disk, 0);
|
||||
/* zram devices sort of resembles non-rotational disks */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, zram->disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
|
||||
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
|
||||
if (ret)
|
||||
goto out_cleanup_disk;
|
||||
|
@ -3708,4 +3708,5 @@ static void __exit cdrom_exit(void)
|
||||
|
||||
module_init(cdrom_init);
|
||||
module_exit(cdrom_exit);
|
||||
MODULE_DESCRIPTION("Uniform CD-ROM driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -744,6 +744,7 @@ static int probe_gdrom(struct platform_device *devptr)
|
||||
.max_segments = 1,
|
||||
/* set a large max size to get most from DMA */
|
||||
.max_segment_size = 0x40000,
|
||||
.features = BLK_FEAT_ROTATIONAL,
|
||||
};
|
||||
int err;
|
||||
|
||||
|
@ -897,7 +897,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
sector_t sectors, struct block_device *cached_bdev,
|
||||
const struct block_device_operations *ops)
|
||||
{
|
||||
struct request_queue *q;
|
||||
const size_t max_stripes = min_t(size_t, INT_MAX,
|
||||
SIZE_MAX / sizeof(atomic_t));
|
||||
struct queue_limits lim = {
|
||||
@ -909,6 +908,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
.io_min = block_size,
|
||||
.logical_block_size = block_size,
|
||||
.physical_block_size = block_size,
|
||||
.features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA,
|
||||
};
|
||||
uint64_t n;
|
||||
int idx;
|
||||
@ -974,13 +974,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
|
||||
d->disk->minors = BCACHE_MINORS;
|
||||
d->disk->fops = ops;
|
||||
d->disk->private_data = d;
|
||||
|
||||
q = d->disk->queue;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, d->disk->queue);
|
||||
|
||||
blk_queue_write_cache(q, true, true);
|
||||
|
||||
return 0;
|
||||
|
||||
out_bioset_exit:
|
||||
@ -1423,8 +1416,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
|
||||
}
|
||||
|
||||
if (bdev_io_opt(dc->bdev))
|
||||
dc->partial_stripes_expensive =
|
||||
q->limits.raid_partial_stripes_expensive;
|
||||
dc->partial_stripes_expensive = !!(q->limits.features &
|
||||
BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE);
|
||||
|
||||
ret = bcache_device_init(&dc->disk, block_size,
|
||||
bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
|
||||
|
@ -3403,7 +3403,6 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
|
||||
limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
|
||||
limits->discard_granularity = origin_limits->discard_granularity;
|
||||
limits->discard_alignment = origin_limits->discard_alignment;
|
||||
limits->discard_misaligned = origin_limits->discard_misaligned;
|
||||
}
|
||||
|
||||
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
|
@ -2059,7 +2059,6 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
|
||||
limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
|
||||
limits->discard_granularity = dest_limits->discard_granularity;
|
||||
limits->discard_alignment = dest_limits->discard_alignment;
|
||||
limits->discard_misaligned = dest_limits->discard_misaligned;
|
||||
limits->max_discard_segments = dest_limits->max_discard_segments;
|
||||
}
|
||||
|
||||
|
@ -206,7 +206,6 @@ struct dm_table {
|
||||
|
||||
bool integrity_supported:1;
|
||||
bool singleton:1;
|
||||
unsigned integrity_added:1;
|
||||
|
||||
/*
|
||||
* Indicates the rw permissions for the new logical device. This
|
||||
|
@ -1176,8 +1176,8 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti)
|
||||
struct blk_integrity *bi = blk_get_integrity(cc->dev->bdev->bd_disk);
|
||||
struct mapped_device *md = dm_table_get_md(ti->table);
|
||||
|
||||
/* From now we require underlying device with our integrity profile */
|
||||
if (!bi || strcasecmp(bi->profile->name, "DM-DIF-EXT-TAG")) {
|
||||
/* We require an underlying device with non-PI metadata */
|
||||
if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) {
|
||||
ti->error = "Integrity profile not supported.";
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -350,25 +350,6 @@ static struct kmem_cache *journal_io_cache;
|
||||
#define DEBUG_bytes(bytes, len, msg, ...) do { } while (0)
|
||||
#endif
|
||||
|
||||
static void dm_integrity_prepare(struct request *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* DM Integrity profile, protection is performed layer above (dm-crypt)
|
||||
*/
|
||||
static const struct blk_integrity_profile dm_integrity_profile = {
|
||||
.name = "DM-DIF-EXT-TAG",
|
||||
.generate_fn = NULL,
|
||||
.verify_fn = NULL,
|
||||
.prepare_fn = dm_integrity_prepare,
|
||||
.complete_fn = dm_integrity_complete,
|
||||
};
|
||||
|
||||
static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
|
||||
static void integrity_bio_wait(struct work_struct *w);
|
||||
static void dm_integrity_dtr(struct dm_target *ti);
|
||||
@ -3494,6 +3475,17 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
|
||||
limits->dma_alignment = limits->logical_block_size - 1;
|
||||
limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
if (!ic->internal_hash) {
|
||||
struct blk_integrity *bi = &limits->integrity;
|
||||
|
||||
memset(bi, 0, sizeof(*bi));
|
||||
bi->tuple_size = ic->tag_size;
|
||||
bi->tag_size = bi->tuple_size;
|
||||
bi->interval_exp =
|
||||
ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
|
||||
}
|
||||
|
||||
limits->max_integrity_segments = USHRT_MAX;
|
||||
}
|
||||
|
||||
@ -3650,20 +3642,6 @@ try_smaller_buffer:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic)
|
||||
{
|
||||
struct gendisk *disk = dm_disk(dm_table_get_md(ti->table));
|
||||
struct blk_integrity bi;
|
||||
|
||||
memset(&bi, 0, sizeof(bi));
|
||||
bi.profile = &dm_integrity_profile;
|
||||
bi.tuple_size = ic->tag_size;
|
||||
bi.tag_size = bi.tuple_size;
|
||||
bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT;
|
||||
|
||||
blk_integrity_register(disk, &bi);
|
||||
}
|
||||
|
||||
static void dm_integrity_free_page_list(struct page_list *pl)
|
||||
{
|
||||
unsigned int i;
|
||||
@ -4649,9 +4627,6 @@ try_smaller_buffer:
|
||||
}
|
||||
}
|
||||
|
||||
if (!ic->internal_hash)
|
||||
dm_integrity_set(ti, ic);
|
||||
|
||||
ti->num_flush_bios = 1;
|
||||
ti->flush_supported = true;
|
||||
if (ic->discard)
|
||||
|
@ -3542,7 +3542,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
||||
recovery = rs->md.recovery;
|
||||
state = decipher_sync_action(mddev, recovery);
|
||||
progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
|
||||
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
|
||||
resync_mismatches = mddev->last_sync_action == ACTION_CHECK ?
|
||||
atomic64_read(&mddev->resync_mismatches) : 0;
|
||||
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
||||
|
@ -425,6 +425,13 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
|
||||
q->limits.logical_block_size,
|
||||
q->limits.alignment_offset,
|
||||
(unsigned long long) start << SECTOR_SHIFT);
|
||||
|
||||
/*
|
||||
* Only stack the integrity profile if the target doesn't have native
|
||||
* integrity support.
|
||||
*/
|
||||
if (!dm_target_has_integrity(ti->type))
|
||||
queue_limits_stack_integrity_bdev(limits, bdev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -572,6 +579,12 @@ int dm_split_args(int *argc, char ***argvp, char *input)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dm_set_stacking_limits(struct queue_limits *limits)
|
||||
{
|
||||
blk_set_stacking_limits(limits);
|
||||
limits->features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | BLK_FEAT_POLL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Impose necessary and sufficient conditions on a devices's table such
|
||||
* that any incoming bio which respects its logical_block_size can be
|
||||
@ -610,7 +623,7 @@ static int validate_hardware_logical_block_alignment(struct dm_table *t,
|
||||
for (i = 0; i < t->num_targets; i++) {
|
||||
ti = dm_table_get_target(t, i);
|
||||
|
||||
blk_set_stacking_limits(&ti_limits);
|
||||
dm_set_stacking_limits(&ti_limits);
|
||||
|
||||
/* combine all target devices' limits */
|
||||
if (ti->type->iterate_devices)
|
||||
@ -702,9 +715,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
|
||||
t->immutable_target_type = ti->type;
|
||||
}
|
||||
|
||||
if (dm_target_has_integrity(ti->type))
|
||||
t->integrity_added = 1;
|
||||
|
||||
ti->table = t;
|
||||
ti->begin = start;
|
||||
ti->len = len;
|
||||
@ -1014,14 +1024,13 @@ bool dm_table_request_based(struct dm_table *t)
|
||||
return __table_type_request_based(dm_table_get_type(t));
|
||||
}
|
||||
|
||||
static bool dm_table_supports_poll(struct dm_table *t);
|
||||
|
||||
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
|
||||
{
|
||||
enum dm_queue_mode type = dm_table_get_type(t);
|
||||
unsigned int per_io_data_size = 0, front_pad, io_front_pad;
|
||||
unsigned int min_pool_size = 0, pool_size;
|
||||
struct dm_md_mempools *pools;
|
||||
unsigned int bioset_flags = 0;
|
||||
|
||||
if (unlikely(type == DM_TYPE_NONE)) {
|
||||
DMERR("no table type is set, can't allocate mempools");
|
||||
@ -1038,6 +1047,9 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
|
||||
goto init_bs;
|
||||
}
|
||||
|
||||
if (md->queue->limits.features & BLK_FEAT_POLL)
|
||||
bioset_flags |= BIOSET_PERCPU_CACHE;
|
||||
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
@ -1050,8 +1062,7 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
|
||||
|
||||
io_front_pad = roundup(per_io_data_size,
|
||||
__alignof__(struct dm_io)) + DM_IO_BIO_OFFSET;
|
||||
if (bioset_init(&pools->io_bs, pool_size, io_front_pad,
|
||||
dm_table_supports_poll(t) ? BIOSET_PERCPU_CACHE : 0))
|
||||
if (bioset_init(&pools->io_bs, pool_size, io_front_pad, bioset_flags))
|
||||
goto out_free_pools;
|
||||
if (t->integrity_supported &&
|
||||
bioset_integrity_create(&pools->io_bs, pool_size))
|
||||
@ -1119,99 +1130,6 @@ static int dm_table_build_index(struct dm_table *t)
|
||||
return r;
|
||||
}
|
||||
|
||||
static bool integrity_profile_exists(struct gendisk *disk)
|
||||
{
|
||||
return !!blk_get_integrity(disk);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a disk whose integrity profile reflects the table's profile.
|
||||
* Returns NULL if integrity support was inconsistent or unavailable.
|
||||
*/
|
||||
static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t)
|
||||
{
|
||||
struct list_head *devices = dm_table_get_devices(t);
|
||||
struct dm_dev_internal *dd = NULL;
|
||||
struct gendisk *prev_disk = NULL, *template_disk = NULL;
|
||||
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!dm_target_passes_integrity(ti->type))
|
||||
goto no_integrity;
|
||||
}
|
||||
|
||||
list_for_each_entry(dd, devices, list) {
|
||||
template_disk = dd->dm_dev->bdev->bd_disk;
|
||||
if (!integrity_profile_exists(template_disk))
|
||||
goto no_integrity;
|
||||
else if (prev_disk &&
|
||||
blk_integrity_compare(prev_disk, template_disk) < 0)
|
||||
goto no_integrity;
|
||||
prev_disk = template_disk;
|
||||
}
|
||||
|
||||
return template_disk;
|
||||
|
||||
no_integrity:
|
||||
if (prev_disk)
|
||||
DMWARN("%s: integrity not set: %s and %s profile mismatch",
|
||||
dm_device_name(t->md),
|
||||
prev_disk->disk_name,
|
||||
template_disk->disk_name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Register the mapped device for blk_integrity support if the
|
||||
* underlying devices have an integrity profile. But all devices may
|
||||
* not have matching profiles (checking all devices isn't reliable
|
||||
* during table load because this table may use other DM device(s) which
|
||||
* must be resumed before they will have an initialized integity
|
||||
* profile). Consequently, stacked DM devices force a 2 stage integrity
|
||||
* profile validation: First pass during table load, final pass during
|
||||
* resume.
|
||||
*/
|
||||
static int dm_table_register_integrity(struct dm_table *t)
|
||||
{
|
||||
struct mapped_device *md = t->md;
|
||||
struct gendisk *template_disk = NULL;
|
||||
|
||||
/* If target handles integrity itself do not register it here. */
|
||||
if (t->integrity_added)
|
||||
return 0;
|
||||
|
||||
template_disk = dm_table_get_integrity_disk(t);
|
||||
if (!template_disk)
|
||||
return 0;
|
||||
|
||||
if (!integrity_profile_exists(dm_disk(md))) {
|
||||
t->integrity_supported = true;
|
||||
/*
|
||||
* Register integrity profile during table load; we can do
|
||||
* this because the final profile must match during resume.
|
||||
*/
|
||||
blk_integrity_register(dm_disk(md),
|
||||
blk_get_integrity(template_disk));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If DM device already has an initialized integrity
|
||||
* profile the new profile should not conflict.
|
||||
*/
|
||||
if (blk_integrity_compare(dm_disk(md), template_disk) < 0) {
|
||||
DMERR("%s: conflict with existing integrity profile: %s profile mismatch",
|
||||
dm_device_name(t->md),
|
||||
template_disk->disk_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Preserve existing integrity profile */
|
||||
t->integrity_supported = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
|
||||
|
||||
struct dm_crypto_profile {
|
||||
@ -1423,12 +1341,6 @@ int dm_table_complete(struct dm_table *t)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_table_register_integrity(t);
|
||||
if (r) {
|
||||
DMERR("could not register integrity profile.");
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_table_construct_crypto_profile(t);
|
||||
if (r) {
|
||||
DMERR("could not construct crypto profile.");
|
||||
@ -1493,14 +1405,6 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
|
||||
return &t->targets[(KEYS_PER_NODE * n) + k];
|
||||
}
|
||||
|
||||
static int device_not_poll_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return !test_bit(QUEUE_FLAG_POLL, &q->queue_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* type->iterate_devices() should be called when the sanity check needs to
|
||||
* iterate and check all underlying data devices. iterate_devices() will
|
||||
@ -1548,19 +1452,6 @@ static int count_device(struct dm_target *ti, struct dm_dev *dev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool dm_table_supports_poll(struct dm_table *t)
|
||||
{
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
ti->type->iterate_devices(ti, device_not_poll_capable, NULL))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether a table has no data devices attached using each
|
||||
* target's iterate_devices method.
|
||||
@ -1686,12 +1577,20 @@ int dm_calculate_queue_limits(struct dm_table *t,
|
||||
unsigned int zone_sectors = 0;
|
||||
bool zoned = false;
|
||||
|
||||
blk_set_stacking_limits(limits);
|
||||
dm_set_stacking_limits(limits);
|
||||
|
||||
t->integrity_supported = true;
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!dm_target_passes_integrity(ti->type))
|
||||
t->integrity_supported = false;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
blk_set_stacking_limits(&ti_limits);
|
||||
dm_set_stacking_limits(&ti_limits);
|
||||
|
||||
if (!ti->type->iterate_devices) {
|
||||
/* Set I/O hints portion of queue limits */
|
||||
@ -1706,12 +1605,12 @@ int dm_calculate_queue_limits(struct dm_table *t,
|
||||
ti->type->iterate_devices(ti, dm_set_device_limits,
|
||||
&ti_limits);
|
||||
|
||||
if (!zoned && ti_limits.zoned) {
|
||||
if (!zoned && (ti_limits.features & BLK_FEAT_ZONED)) {
|
||||
/*
|
||||
* After stacking all limits, validate all devices
|
||||
* in table support this zoned model and zone sectors.
|
||||
*/
|
||||
zoned = ti_limits.zoned;
|
||||
zoned = (ti_limits.features & BLK_FEAT_ZONED);
|
||||
zone_sectors = ti_limits.chunk_sectors;
|
||||
}
|
||||
|
||||
@ -1738,6 +1637,18 @@ combine_limits:
|
||||
dm_device_name(t->md),
|
||||
(unsigned long long) ti->begin,
|
||||
(unsigned long long) ti->len);
|
||||
|
||||
if (t->integrity_supported ||
|
||||
dm_target_has_integrity(ti->type)) {
|
||||
if (!queue_limits_stack_integrity(limits, &ti_limits)) {
|
||||
DMWARN("%s: adding target device (start sect %llu len %llu) "
|
||||
"disabled integrity support due to incompatibility",
|
||||
dm_device_name(t->md),
|
||||
(unsigned long long) ti->begin,
|
||||
(unsigned long long) ti->len);
|
||||
t->integrity_supported = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1747,12 +1658,12 @@ combine_limits:
|
||||
* zoned model on host-managed zoned block devices.
|
||||
* BUT...
|
||||
*/
|
||||
if (limits->zoned) {
|
||||
if (limits->features & BLK_FEAT_ZONED) {
|
||||
/*
|
||||
* ...IF the above limits stacking determined a zoned model
|
||||
* validate that all of the table's devices conform to it.
|
||||
*/
|
||||
zoned = limits->zoned;
|
||||
zoned = limits->features & BLK_FEAT_ZONED;
|
||||
zone_sectors = limits->chunk_sectors;
|
||||
}
|
||||
if (validate_hardware_zoned(t, zoned, zone_sectors))
|
||||
@ -1762,63 +1673,15 @@ combine_limits:
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that all devices have an integrity profile that matches the
|
||||
* DM device's registered integrity profile. If the profiles don't
|
||||
* match then unregister the DM device's integrity profile.
|
||||
* Check if a target requires flush support even if none of the underlying
|
||||
* devices need it (e.g. to persist target-specific metadata).
|
||||
*/
|
||||
static void dm_table_verify_integrity(struct dm_table *t)
|
||||
static bool dm_table_supports_flush(struct dm_table *t)
|
||||
{
|
||||
struct gendisk *template_disk = NULL;
|
||||
|
||||
if (t->integrity_added)
|
||||
return;
|
||||
|
||||
if (t->integrity_supported) {
|
||||
/*
|
||||
* Verify that the original integrity profile
|
||||
* matches all the devices in this table.
|
||||
*/
|
||||
template_disk = dm_table_get_integrity_disk(t);
|
||||
if (template_disk &&
|
||||
blk_integrity_compare(dm_disk(t->md), template_disk) >= 0)
|
||||
return;
|
||||
}
|
||||
|
||||
if (integrity_profile_exists(dm_disk(t->md))) {
|
||||
DMWARN("%s: unable to establish an integrity profile",
|
||||
dm_device_name(t->md));
|
||||
blk_integrity_unregister(dm_disk(t->md));
|
||||
}
|
||||
}
|
||||
|
||||
static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
unsigned long flush = (unsigned long) data;
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return (q->queue_flags & flush);
|
||||
}
|
||||
|
||||
static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
|
||||
{
|
||||
/*
|
||||
* Require at least one underlying device to support flushes.
|
||||
* t->devices includes internal dm devices such as mirror logs
|
||||
* so we need to use iterate_devices here, which targets
|
||||
* supporting flushes must provide.
|
||||
*/
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
if (!ti->num_flush_bios)
|
||||
continue;
|
||||
|
||||
if (ti->flush_supported)
|
||||
return true;
|
||||
|
||||
if (ti->type->iterate_devices &&
|
||||
ti->type->iterate_devices(ti, device_flush_capable, (void *) flush))
|
||||
if (ti->num_flush_bios && ti->flush_supported)
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1839,20 +1702,6 @@ static int device_dax_write_cache_enabled(struct dm_target *ti,
|
||||
return false;
|
||||
}
|
||||
|
||||
static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
return !bdev_nonrot(dev->bdev);
|
||||
}
|
||||
|
||||
static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(dev->bdev);
|
||||
|
||||
return !blk_queue_add_random(q);
|
||||
}
|
||||
|
||||
static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
@ -1877,12 +1726,6 @@ static bool dm_table_supports_write_zeroes(struct dm_table *t)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int device_not_nowait_capable(struct dm_target *ti, struct dm_dev *dev,
|
||||
sector_t start, sector_t len, void *data)
|
||||
{
|
||||
return !bdev_nowait(dev->bdev);
|
||||
}
|
||||
|
||||
static bool dm_table_supports_nowait(struct dm_table *t)
|
||||
{
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
@ -1890,10 +1733,6 @@ static bool dm_table_supports_nowait(struct dm_table *t)
|
||||
|
||||
if (!dm_target_supports_nowait(ti->type))
|
||||
return false;
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
ti->type->iterate_devices(ti, device_not_nowait_capable, NULL))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -1950,29 +1789,25 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
|
||||
return true;
|
||||
}
|
||||
|
||||
static int device_requires_stable_pages(struct dm_target *ti,
|
||||
struct dm_dev *dev, sector_t start,
|
||||
sector_t len, void *data)
|
||||
{
|
||||
return bdev_stable_writes(dev->bdev);
|
||||
}
|
||||
|
||||
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *limits)
|
||||
{
|
||||
bool wc = false, fua = false;
|
||||
int r;
|
||||
|
||||
if (dm_table_supports_nowait(t))
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
|
||||
if (!dm_table_supports_nowait(t))
|
||||
limits->features &= ~BLK_FEAT_NOWAIT;
|
||||
|
||||
/*
|
||||
* The current polling impementation does not support request based
|
||||
* stacking.
|
||||
*/
|
||||
if (!__table_type_bio_based(t->type))
|
||||
limits->features &= ~BLK_FEAT_POLL;
|
||||
|
||||
if (!dm_table_supports_discards(t)) {
|
||||
limits->max_hw_discard_sectors = 0;
|
||||
limits->discard_granularity = 0;
|
||||
limits->discard_alignment = 0;
|
||||
limits->discard_misaligned = 0;
|
||||
}
|
||||
|
||||
if (!dm_table_supports_write_zeroes(t))
|
||||
@ -1981,58 +1816,22 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
if (!dm_table_supports_secure_erase(t))
|
||||
limits->max_secure_erase_sectors = 0;
|
||||
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
|
||||
wc = true;
|
||||
if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_FUA)))
|
||||
fua = true;
|
||||
}
|
||||
blk_queue_write_cache(q, wc, fua);
|
||||
if (dm_table_supports_flush(t))
|
||||
limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
|
||||
|
||||
if (dm_table_supports_dax(t, device_not_dax_capable)) {
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
limits->features |= BLK_FEAT_DAX;
|
||||
if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
|
||||
set_dax_synchronous(t->md->dax_dev);
|
||||
} else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
|
||||
limits->features &= ~BLK_FEAT_DAX;
|
||||
|
||||
if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
|
||||
dax_write_cache(t->md->dax_dev, true);
|
||||
|
||||
/* Ensure that all underlying devices are non-rotational. */
|
||||
if (dm_table_any_dev_attr(t, device_is_rotational, NULL))
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
|
||||
dm_table_verify_integrity(t);
|
||||
|
||||
/*
|
||||
* Some devices don't use blk_integrity but still want stable pages
|
||||
* because they do their own checksumming.
|
||||
* If any underlying device requires stable pages, a table must require
|
||||
* them as well. Only targets that support iterate_devices are considered:
|
||||
* don't want error, zero, etc to require stable pages.
|
||||
*/
|
||||
if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL))
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
|
||||
/*
|
||||
* Determine whether or not this queue's I/O timings contribute
|
||||
* to the entropy pool, Only request-based targets use this.
|
||||
* Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not
|
||||
* have it set.
|
||||
*/
|
||||
if (blk_queue_add_random(q) &&
|
||||
dm_table_any_dev_attr(t, device_is_not_random, NULL))
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
|
||||
|
||||
/*
|
||||
* For a zoned target, setup the zones related queue attributes
|
||||
* and resources necessary for zone append emulation if necessary.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && limits->zoned) {
|
||||
/* For a zoned table, setup the zone related queue attributes. */
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
(limits->features & BLK_FEAT_ZONED)) {
|
||||
r = dm_set_zones_restrictions(t, q, limits);
|
||||
if (r)
|
||||
return r;
|
||||
@ -2042,22 +1841,18 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
dm_update_crypto_profile(q, t);
|
||||
|
||||
/*
|
||||
* Check for request-based device is left to
|
||||
* dm_mq_init_request_queue()->blk_mq_init_allocated_queue().
|
||||
*
|
||||
* For bio-based device, only set QUEUE_FLAG_POLL when all
|
||||
* underlying devices supporting polling.
|
||||
* Now that the limits are set, check the zones mapped by the table
|
||||
* and setup the resources for zone append emulation if necessary.
|
||||
*/
|
||||
if (__table_type_bio_based(t->type)) {
|
||||
if (dm_table_supports_poll(t))
|
||||
blk_queue_flag_set(QUEUE_FLAG_POLL, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
|
||||
(limits->features & BLK_FEAT_ZONED)) {
|
||||
r = dm_revalidate_zones(t, q);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
dm_update_crypto_profile(q, t);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -13,8 +13,6 @@
|
||||
|
||||
#define DM_MSG_PREFIX "zone"
|
||||
|
||||
#define DM_ZONE_INVALID_WP_OFST UINT_MAX
|
||||
|
||||
/*
|
||||
* For internal zone reports bypassing the top BIO submission path.
|
||||
*/
|
||||
@ -145,35 +143,28 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Count conventional zones of a mapped zoned device. If the device
|
||||
* only has conventional zones, do not expose it as zoned.
|
||||
*/
|
||||
static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
unsigned int *nr_conv_zones = data;
|
||||
|
||||
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
|
||||
(*nr_conv_zones)++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revalidate the zones of a mapped device to initialize resource necessary
|
||||
* for zone append emulation. Note that we cannot simply use the block layer
|
||||
* blk_revalidate_disk_zones() function here as the mapped device is suspended
|
||||
* (this is called from __bind() context).
|
||||
*/
|
||||
static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t)
|
||||
int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
|
||||
{
|
||||
struct mapped_device *md = t->md;
|
||||
struct gendisk *disk = md->disk;
|
||||
int ret;
|
||||
|
||||
if (!get_capacity(disk))
|
||||
return 0;
|
||||
|
||||
/* Revalidate only if something changed. */
|
||||
if (!disk->nr_zones || disk->nr_zones != md->nr_zones)
|
||||
if (!disk->nr_zones || disk->nr_zones != md->nr_zones) {
|
||||
DMINFO("%s using %s zone append",
|
||||
disk->disk_name,
|
||||
queue_emulates_zone_append(q) ? "emulated" : "native");
|
||||
md->nr_zones = 0;
|
||||
}
|
||||
|
||||
if (md->nr_zones)
|
||||
return 0;
|
||||
@ -220,13 +211,129 @@ static bool dm_table_supports_zone_append(struct dm_table *t)
|
||||
return true;
|
||||
}
|
||||
|
||||
struct dm_device_zone_count {
|
||||
sector_t start;
|
||||
sector_t len;
|
||||
unsigned int total_nr_seq_zones;
|
||||
unsigned int target_nr_seq_zones;
|
||||
};
|
||||
|
||||
/*
|
||||
* Count the total number of and the number of mapped sequential zones of a
|
||||
* target zoned device.
|
||||
*/
|
||||
static int dm_device_count_zones_cb(struct blk_zone *zone,
|
||||
unsigned int idx, void *data)
|
||||
{
|
||||
struct dm_device_zone_count *zc = data;
|
||||
|
||||
if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
|
||||
zc->total_nr_seq_zones++;
|
||||
if (zone->start >= zc->start &&
|
||||
zone->start < zc->start + zc->len)
|
||||
zc->target_nr_seq_zones++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dm_device_count_zones(struct dm_dev *dev,
|
||||
struct dm_device_zone_count *zc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES,
|
||||
dm_device_count_zones_cb, zc);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (!ret)
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct dm_zone_resource_limits {
|
||||
unsigned int mapped_nr_seq_zones;
|
||||
struct queue_limits *lim;
|
||||
bool reliable_limits;
|
||||
};
|
||||
|
||||
static int device_get_zone_resource_limits(struct dm_target *ti,
|
||||
struct dm_dev *dev, sector_t start,
|
||||
sector_t len, void *data)
|
||||
{
|
||||
struct dm_zone_resource_limits *zlim = data;
|
||||
struct gendisk *disk = dev->bdev->bd_disk;
|
||||
unsigned int max_open_zones, max_active_zones;
|
||||
int ret;
|
||||
struct dm_device_zone_count zc = {
|
||||
.start = start,
|
||||
.len = len,
|
||||
};
|
||||
|
||||
/*
|
||||
* If the target is not the whole device, the device zone resources may
|
||||
* be shared between different targets. Check this by counting the
|
||||
* number of mapped sequential zones: if this number is smaller than the
|
||||
* total number of sequential zones of the target device, then resource
|
||||
* sharing may happen and the zone limits will not be reliable.
|
||||
*/
|
||||
ret = dm_device_count_zones(dev, &zc);
|
||||
if (ret) {
|
||||
DMERR("Count %s zones failed %d", disk->disk_name, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the target does not map any sequential zones, then we do not need
|
||||
* any zone resource limits.
|
||||
*/
|
||||
if (!zc.target_nr_seq_zones)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If the target does not map all sequential zones, the limits
|
||||
* will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
|
||||
*/
|
||||
if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) {
|
||||
zlim->reliable_limits = false;
|
||||
ti->zone_reset_all_supported = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the target maps less sequential zones than the limit values, then
|
||||
* we do not have limits for this target.
|
||||
*/
|
||||
max_active_zones = disk->queue->limits.max_active_zones;
|
||||
if (max_active_zones >= zc.target_nr_seq_zones)
|
||||
max_active_zones = 0;
|
||||
zlim->lim->max_active_zones =
|
||||
min_not_zero(max_active_zones, zlim->lim->max_active_zones);
|
||||
|
||||
max_open_zones = disk->queue->limits.max_open_zones;
|
||||
if (max_open_zones >= zc.target_nr_seq_zones)
|
||||
max_open_zones = 0;
|
||||
zlim->lim->max_open_zones =
|
||||
min_not_zero(max_open_zones, zlim->lim->max_open_zones);
|
||||
|
||||
/*
|
||||
* Also count the total number of sequential zones for the mapped
|
||||
* device so that when we are done inspecting all its targets, we are
|
||||
* able to check if the mapped device actually has any sequential zones.
|
||||
*/
|
||||
zlim->mapped_nr_seq_zones += zc.target_nr_seq_zones;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *lim)
|
||||
{
|
||||
struct mapped_device *md = t->md;
|
||||
struct gendisk *disk = md->disk;
|
||||
unsigned int nr_conv_zones = 0;
|
||||
int ret;
|
||||
struct dm_zone_resource_limits zlim = {
|
||||
.reliable_limits = true,
|
||||
.lim = lim,
|
||||
};
|
||||
|
||||
/*
|
||||
* Check if zone append is natively supported, and if not, set the
|
||||
@ -240,46 +347,63 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
lim->max_zone_append_sectors = 0;
|
||||
}
|
||||
|
||||
if (!get_capacity(md->disk))
|
||||
return 0;
|
||||
/*
|
||||
* Determine the max open and max active zone limits for the mapped
|
||||
* device by inspecting the zone resource limits and the zones mapped
|
||||
* by each target.
|
||||
*/
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
/*
|
||||
* Count conventional zones to check that the mapped device will indeed
|
||||
* have sequential write required zones.
|
||||
* Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
|
||||
* device_get_zone_resource_limits() may adjust this if one of
|
||||
* the device used by the target does not have all its
|
||||
* sequential write required zones mapped.
|
||||
*/
|
||||
md->zone_revalidate_map = t;
|
||||
ret = dm_blk_report_zones(disk, 0, UINT_MAX,
|
||||
dm_check_zoned_cb, &nr_conv_zones);
|
||||
md->zone_revalidate_map = NULL;
|
||||
if (ret < 0) {
|
||||
DMERR("Check zoned failed %d", ret);
|
||||
return ret;
|
||||
ti->zone_reset_all_supported = true;
|
||||
|
||||
if (!ti->type->iterate_devices ||
|
||||
ti->type->iterate_devices(ti,
|
||||
device_get_zone_resource_limits, &zlim)) {
|
||||
DMERR("Could not determine %s zone resource limits",
|
||||
disk->disk_name);
|
||||
return -ENODEV;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we only have conventional zones, expose the mapped device as
|
||||
* a regular device.
|
||||
* If we only have conventional zones mapped, expose the mapped device
|
||||
+ as a regular device.
|
||||
*/
|
||||
if (nr_conv_zones >= ret) {
|
||||
if (!zlim.mapped_nr_seq_zones) {
|
||||
lim->max_open_zones = 0;
|
||||
lim->max_active_zones = 0;
|
||||
lim->zoned = false;
|
||||
lim->max_zone_append_sectors = 0;
|
||||
lim->zone_write_granularity = 0;
|
||||
lim->chunk_sectors = 0;
|
||||
lim->features &= ~BLK_FEAT_ZONED;
|
||||
clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
|
||||
md->nr_zones = 0;
|
||||
disk->nr_zones = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!md->disk->nr_zones) {
|
||||
DMINFO("%s using %s zone append",
|
||||
md->disk->disk_name,
|
||||
queue_emulates_zone_append(q) ? "emulated" : "native");
|
||||
}
|
||||
/*
|
||||
* Warn once (when the capacity is not yet set) if the mapped device is
|
||||
* partially using zone resources of the target devices as that leads to
|
||||
* unreliable limits, i.e. if another mapped device uses the same
|
||||
* underlying devices, we cannot enforce zone limits to guarantee that
|
||||
* writing will not lead to errors. Note that we really should return
|
||||
* an error for such case but there is no easy way to find out if
|
||||
* another mapped device uses the same underlying zoned devices.
|
||||
*/
|
||||
if (!get_capacity(disk) && !zlim.reliable_limits)
|
||||
DMWARN("%s zone resource limits may be unreliable",
|
||||
disk->disk_name);
|
||||
|
||||
ret = dm_revalidate_zones(md, t);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!static_key_enabled(&zoned_enabled.key))
|
||||
if (lim->features & BLK_FEAT_ZONED &&
|
||||
!static_key_enabled(&zoned_enabled.key))
|
||||
static_branch_enable(&zoned_enabled);
|
||||
return 0;
|
||||
}
|
||||
@ -306,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
|
||||
void *data)
|
||||
{
|
||||
/*
|
||||
* For an all-zones reset, ignore conventional, empty, read-only
|
||||
* and offline zones.
|
||||
*/
|
||||
switch (zone->cond) {
|
||||
case BLK_ZONE_COND_NOT_WP:
|
||||
case BLK_ZONE_COND_EMPTY:
|
||||
case BLK_ZONE_COND_READONLY:
|
||||
case BLK_ZONE_COND_OFFLINE:
|
||||
return 0;
|
||||
default:
|
||||
set_bit(idx, (unsigned long *)data);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
|
||||
sector_t sector, unsigned int nr_zones,
|
||||
unsigned long *need_reset)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
|
||||
dm_zone_need_reset_cb, need_reset);
|
||||
if (ret != nr_zones) {
|
||||
DMERR("Get %s zone reset bitmap failed\n",
|
||||
md->disk->disk_name);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1009,7 +1009,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
limits->max_sectors = chunk_sectors;
|
||||
|
||||
/* We are exposing a drive-managed zoned block device */
|
||||
limits->zoned = false;
|
||||
limits->features &= ~BLK_FEAT_ZONED;
|
||||
}
|
||||
|
||||
/*
|
||||
|
164
drivers/md/dm.c
164
drivers/md/dm.c
@ -1188,7 +1188,7 @@ static sector_t __max_io_len(struct dm_target *ti, sector_t sector,
|
||||
return len;
|
||||
return min_t(sector_t, len,
|
||||
min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
|
||||
blk_chunk_sectors_left(target_offset, max_granularity)));
|
||||
blk_boundary_sectors_left(target_offset, max_granularity)));
|
||||
}
|
||||
|
||||
static inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
|
||||
@ -1598,21 +1598,20 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
|
||||
|
||||
static bool is_abnormal_io(struct bio *bio)
|
||||
{
|
||||
enum req_op op = bio_op(bio);
|
||||
|
||||
if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
|
||||
switch (op) {
|
||||
switch (bio_op(bio)) {
|
||||
case REQ_OP_READ:
|
||||
case REQ_OP_WRITE:
|
||||
case REQ_OP_FLUSH:
|
||||
return false;
|
||||
case REQ_OP_DISCARD:
|
||||
case REQ_OP_SECURE_ERASE:
|
||||
case REQ_OP_WRITE_ZEROES:
|
||||
case REQ_OP_ZONE_RESET_ALL:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static blk_status_t __process_abnormal_io(struct clone_info *ci,
|
||||
struct dm_target *ti)
|
||||
@ -1776,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
|
||||
{
|
||||
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
|
||||
}
|
||||
|
||||
static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
|
||||
struct dm_target *ti)
|
||||
{
|
||||
struct bio_list blist = BIO_EMPTY_LIST;
|
||||
struct mapped_device *md = ci->io->md;
|
||||
unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors;
|
||||
unsigned long *need_reset;
|
||||
unsigned int i, nr_zones, nr_reset;
|
||||
unsigned int num_bios = 0;
|
||||
blk_status_t sts = BLK_STS_OK;
|
||||
sector_t sector = ti->begin;
|
||||
struct bio *clone;
|
||||
int ret;
|
||||
|
||||
nr_zones = ti->len >> ilog2(zone_sectors);
|
||||
need_reset = bitmap_zalloc(nr_zones, GFP_NOIO);
|
||||
if (!need_reset)
|
||||
return BLK_STS_RESOURCE;
|
||||
|
||||
ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin,
|
||||
nr_zones, need_reset);
|
||||
if (ret) {
|
||||
sts = BLK_STS_IOERR;
|
||||
goto free_bitmap;
|
||||
}
|
||||
|
||||
/* If we have no zone to reset, we are done. */
|
||||
nr_reset = bitmap_weight(need_reset, nr_zones);
|
||||
if (!nr_reset)
|
||||
goto free_bitmap;
|
||||
|
||||
atomic_add(nr_zones, &ci->io->io_count);
|
||||
|
||||
for (i = 0; i < nr_zones; i++) {
|
||||
|
||||
if (!test_bit(i, need_reset)) {
|
||||
sector += zone_sectors;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (bio_list_empty(&blist)) {
|
||||
/* This may take a while, so be nice to others */
|
||||
if (num_bios)
|
||||
cond_resched();
|
||||
|
||||
/*
|
||||
* We may need to reset thousands of zones, so let's
|
||||
* not go crazy with the clone allocation.
|
||||
*/
|
||||
alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
|
||||
NULL, GFP_NOIO);
|
||||
}
|
||||
|
||||
/* Get a clone and change it to a regular reset operation. */
|
||||
clone = bio_list_pop(&blist);
|
||||
clone->bi_opf &= ~REQ_OP_MASK;
|
||||
clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC;
|
||||
clone->bi_iter.bi_sector = sector;
|
||||
clone->bi_iter.bi_size = 0;
|
||||
__map_bio(clone);
|
||||
|
||||
sector += zone_sectors;
|
||||
num_bios++;
|
||||
nr_reset--;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(!bio_list_empty(&blist));
|
||||
atomic_sub(nr_zones - num_bios, &ci->io->io_count);
|
||||
ci->sector_count = 0;
|
||||
|
||||
free_bitmap:
|
||||
bitmap_free(need_reset);
|
||||
|
||||
return sts;
|
||||
}
|
||||
|
||||
static void __send_zone_reset_all_native(struct clone_info *ci,
|
||||
struct dm_target *ti)
|
||||
{
|
||||
unsigned int bios;
|
||||
|
||||
atomic_add(1, &ci->io->io_count);
|
||||
bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
|
||||
atomic_sub(1 - bios, &ci->io->io_count);
|
||||
|
||||
ci->sector_count = 0;
|
||||
}
|
||||
|
||||
static blk_status_t __send_zone_reset_all(struct clone_info *ci)
|
||||
{
|
||||
struct dm_table *t = ci->map;
|
||||
blk_status_t sts = BLK_STS_OK;
|
||||
|
||||
for (unsigned int i = 0; i < t->num_targets; i++) {
|
||||
struct dm_target *ti = dm_table_get_target(t, i);
|
||||
|
||||
if (ti->zone_reset_all_supported) {
|
||||
__send_zone_reset_all_native(ci, ti);
|
||||
continue;
|
||||
}
|
||||
|
||||
sts = __send_zone_reset_all_emulated(ci, ti);
|
||||
if (sts != BLK_STS_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
/* Release the reference that alloc_io() took for submission. */
|
||||
atomic_sub(1, &ci->io->io_count);
|
||||
|
||||
return sts;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
|
||||
struct bio *bio)
|
||||
@ -1786,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static blk_status_t __send_zone_reset_all(struct clone_info *ci)
|
||||
{
|
||||
return BLK_STS_NOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -1799,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md,
|
||||
blk_status_t error = BLK_STS_OK;
|
||||
bool is_abnormal, need_split;
|
||||
|
||||
need_split = is_abnormal = is_abnormal_io(bio);
|
||||
if (static_branch_unlikely(&zoned_enabled))
|
||||
need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
|
||||
is_abnormal = is_abnormal_io(bio);
|
||||
if (static_branch_unlikely(&zoned_enabled)) {
|
||||
/* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
|
||||
need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
|
||||
(is_abnormal || dm_zone_bio_needs_split(md, bio));
|
||||
} else {
|
||||
need_split = is_abnormal;
|
||||
}
|
||||
|
||||
if (unlikely(need_split)) {
|
||||
/*
|
||||
@ -1842,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (static_branch_unlikely(&zoned_enabled) &&
|
||||
(bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
|
||||
error = __send_zone_reset_all(&ci);
|
||||
goto out;
|
||||
}
|
||||
|
||||
error = __split_and_process_bio(&ci);
|
||||
if (error || !ci.sector_count)
|
||||
goto out;
|
||||
@ -2386,22 +2513,15 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
|
||||
struct table_device *td;
|
||||
int r;
|
||||
|
||||
switch (type) {
|
||||
case DM_TYPE_REQUEST_BASED:
|
||||
WARN_ON_ONCE(type == DM_TYPE_NONE);
|
||||
|
||||
if (type == DM_TYPE_REQUEST_BASED) {
|
||||
md->disk->fops = &dm_rq_blk_dops;
|
||||
r = dm_mq_init_request_queue(md, t);
|
||||
if (r) {
|
||||
DMERR("Cannot initialize queue for request-based dm mapped device");
|
||||
return r;
|
||||
}
|
||||
break;
|
||||
case DM_TYPE_BIO_BASED:
|
||||
case DM_TYPE_DAX_BIO_BASED:
|
||||
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue);
|
||||
break;
|
||||
case DM_TYPE_NONE:
|
||||
WARN_ON_ONCE(true);
|
||||
break;
|
||||
}
|
||||
|
||||
r = dm_calculate_queue_limits(t, &limits);
|
||||
|
@ -103,12 +103,16 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
|
||||
*/
|
||||
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
|
||||
struct queue_limits *lim);
|
||||
int dm_revalidate_zones(struct dm_table *t, struct request_queue *q);
|
||||
void dm_zone_endio(struct dm_io *io, struct bio *clone);
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
unsigned int nr_zones, report_zones_cb cb, void *data);
|
||||
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
|
||||
int dm_zone_map_bio(struct dm_target_io *io);
|
||||
int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
|
||||
sector_t sector, unsigned int nr_zones,
|
||||
unsigned long *need_reset);
|
||||
#else
|
||||
#define dm_blk_report_zones NULL
|
||||
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
|
||||
|
@ -227,6 +227,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
struct block_device *bdev;
|
||||
struct mddev *mddev = bitmap->mddev;
|
||||
struct bitmap_storage *store = &bitmap->storage;
|
||||
unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
|
||||
PAGE_SHIFT;
|
||||
loff_t sboff, offset = mddev->bitmap_info.offset;
|
||||
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
|
||||
unsigned int size = PAGE_SIZE;
|
||||
@ -269,11 +271,9 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
|
||||
if (size == 0)
|
||||
/* bitmap runs in to data */
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* DATA METADATA BITMAP - no problems */
|
||||
}
|
||||
|
||||
md_super_write(mddev, rdev, sboff + ps, (int) size, page);
|
||||
md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1570,7 +1570,7 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct md_cluster_operations cluster_ops = {
|
||||
static const struct md_cluster_operations cluster_ops = {
|
||||
.join = join,
|
||||
.leave = leave,
|
||||
.slot_number = slot_number,
|
||||
|
618
drivers/md/md.c
618
drivers/md/md.c
@ -69,13 +69,23 @@
|
||||
#include "md-bitmap.h"
|
||||
#include "md-cluster.h"
|
||||
|
||||
static const char *action_name[NR_SYNC_ACTIONS] = {
|
||||
[ACTION_RESYNC] = "resync",
|
||||
[ACTION_RECOVER] = "recover",
|
||||
[ACTION_CHECK] = "check",
|
||||
[ACTION_REPAIR] = "repair",
|
||||
[ACTION_RESHAPE] = "reshape",
|
||||
[ACTION_FROZEN] = "frozen",
|
||||
[ACTION_IDLE] = "idle",
|
||||
};
|
||||
|
||||
/* pers_list is a list of registered personalities protected by pers_lock. */
|
||||
static LIST_HEAD(pers_list);
|
||||
static DEFINE_SPINLOCK(pers_lock);
|
||||
|
||||
static const struct kobj_type md_ktype;
|
||||
|
||||
struct md_cluster_operations *md_cluster_ops;
|
||||
const struct md_cluster_operations *md_cluster_ops;
|
||||
EXPORT_SYMBOL(md_cluster_ops);
|
||||
static struct module *md_cluster_mod;
|
||||
|
||||
@ -479,7 +489,6 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
|
||||
*/
|
||||
WRITE_ONCE(mddev->suspended, mddev->suspended + 1);
|
||||
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
/* restrict memory reclaim I/O during raid array is suspend */
|
||||
mddev->noio_flag = memalloc_noio_save();
|
||||
|
||||
@ -550,14 +559,10 @@ static void md_end_flush(struct bio *bio)
|
||||
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
|
||||
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
||||
/* The pair is percpu_ref_get() from md_flush_request() */
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
|
||||
if (atomic_dec_and_test(&mddev->flush_pending))
|
||||
/* The pre-request flush has finished */
|
||||
queue_work(md_wq, &mddev->flush_work);
|
||||
}
|
||||
}
|
||||
|
||||
static void md_submit_flush_data(struct work_struct *ws);
|
||||
|
||||
@ -587,13 +592,9 @@ static void submit_flushes(struct work_struct *ws)
|
||||
rcu_read_lock();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (atomic_dec_and_test(&mddev->flush_pending)) {
|
||||
/* The pair is percpu_ref_get() from md_flush_request() */
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
|
||||
if (atomic_dec_and_test(&mddev->flush_pending))
|
||||
queue_work(md_wq, &mddev->flush_work);
|
||||
}
|
||||
}
|
||||
|
||||
static void md_submit_flush_data(struct work_struct *ws)
|
||||
{
|
||||
@ -617,8 +618,20 @@ static void md_submit_flush_data(struct work_struct *ws)
|
||||
bio_endio(bio);
|
||||
} else {
|
||||
bio->bi_opf &= ~REQ_PREFLUSH;
|
||||
md_handle_request(mddev, bio);
|
||||
|
||||
/*
|
||||
* make_requst() will never return error here, it only
|
||||
* returns error in raid5_make_request() by dm-raid.
|
||||
* Since dm always splits data and flush operation into
|
||||
* two separate io, io size of flush submitted by dm
|
||||
* always is 0, make_request() will not be called here.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
|
||||
bio_io_error(bio);
|
||||
}
|
||||
|
||||
/* The pair is percpu_ref_get() from md_flush_request() */
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -654,25 +667,23 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio)
|
||||
WARN_ON(percpu_ref_is_zero(&mddev->active_io));
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
mddev->flush_bio = bio;
|
||||
bio = NULL;
|
||||
}
|
||||
spin_unlock_irq(&mddev->lock);
|
||||
|
||||
if (!bio) {
|
||||
INIT_WORK(&mddev->flush_work, submit_flushes);
|
||||
queue_work(md_wq, &mddev->flush_work);
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* flush was performed for some other bio while we waited. */
|
||||
if (bio->bi_iter.bi_size == 0)
|
||||
/* an empty barrier - all done */
|
||||
spin_unlock_irq(&mddev->lock);
|
||||
if (bio->bi_iter.bi_size == 0) {
|
||||
/* pure flush without data - all done */
|
||||
bio_endio(bio);
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
|
||||
bio->bi_opf &= ~REQ_PREFLUSH;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL(md_flush_request);
|
||||
|
||||
static inline struct mddev *mddev_get(struct mddev *mddev)
|
||||
@ -742,7 +753,6 @@ int mddev_init(struct mddev *mddev)
|
||||
|
||||
mutex_init(&mddev->open_mutex);
|
||||
mutex_init(&mddev->reconfig_mutex);
|
||||
mutex_init(&mddev->sync_mutex);
|
||||
mutex_init(&mddev->suspend_mutex);
|
||||
mutex_init(&mddev->bitmap_info.mutex);
|
||||
INIT_LIST_HEAD(&mddev->disks);
|
||||
@ -758,7 +768,7 @@ int mddev_init(struct mddev *mddev)
|
||||
init_waitqueue_head(&mddev->recovery_wait);
|
||||
mddev->reshape_position = MaxSector;
|
||||
mddev->reshape_backwards = 0;
|
||||
mddev->last_sync_action = "none";
|
||||
mddev->last_sync_action = ACTION_IDLE;
|
||||
mddev->resync_min = 0;
|
||||
mddev->resync_max = MaxSector;
|
||||
mddev->level = LEVEL_NONE;
|
||||
@ -2410,36 +2420,10 @@ static LIST_HEAD(pending_raid_disks);
|
||||
*/
|
||||
int md_integrity_register(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev, *reference = NULL;
|
||||
|
||||
if (list_empty(&mddev->disks))
|
||||
return 0; /* nothing to do */
|
||||
if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk))
|
||||
return 0; /* shouldn't register, or already is */
|
||||
rdev_for_each(rdev, mddev) {
|
||||
/* skip spares and non-functional disks */
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
if (rdev->raid_disk < 0)
|
||||
continue;
|
||||
if (!reference) {
|
||||
/* Use the first rdev as the reference */
|
||||
reference = rdev;
|
||||
continue;
|
||||
}
|
||||
/* does this rdev's profile match the reference profile? */
|
||||
if (blk_integrity_compare(reference->bdev->bd_disk,
|
||||
rdev->bdev->bd_disk) < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!reference || !bdev_get_integrity(reference->bdev))
|
||||
return 0;
|
||||
/*
|
||||
* All component devices are integrity capable and have matching
|
||||
* profiles, register the common profile for the md device.
|
||||
*/
|
||||
blk_integrity_register(mddev->gendisk,
|
||||
bdev_get_integrity(reference->bdev));
|
||||
if (mddev_is_dm(mddev) || !blk_get_integrity(mddev->gendisk))
|
||||
return 0; /* shouldn't register */
|
||||
|
||||
pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
|
||||
if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
|
||||
@ -2459,32 +2443,6 @@ int md_integrity_register(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL(md_integrity_register);
|
||||
|
||||
/*
|
||||
* Attempt to add an rdev, but only if it is consistent with the current
|
||||
* integrity profile
|
||||
*/
|
||||
int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
{
|
||||
struct blk_integrity *bi_mddev;
|
||||
|
||||
if (mddev_is_dm(mddev))
|
||||
return 0;
|
||||
|
||||
bi_mddev = blk_get_integrity(mddev->gendisk);
|
||||
|
||||
if (!bi_mddev) /* nothing to do */
|
||||
return 0;
|
||||
|
||||
if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
|
||||
pr_err("%s: incompatible integrity profile for %pg\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(md_integrity_add_rdev);
|
||||
|
||||
static bool rdev_read_only(struct md_rdev *rdev)
|
||||
{
|
||||
return bdev_read_only(rdev->bdev) ||
|
||||
@ -4867,30 +4825,81 @@ out_unlock:
|
||||
static struct md_sysfs_entry md_metadata =
|
||||
__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
|
||||
|
||||
enum sync_action md_sync_action(struct mddev *mddev)
|
||||
{
|
||||
unsigned long recovery = mddev->recovery;
|
||||
|
||||
/*
|
||||
* frozen has the highest priority, means running sync_thread will be
|
||||
* stopped immediately, and no new sync_thread can start.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
|
||||
return ACTION_FROZEN;
|
||||
|
||||
/*
|
||||
* read-only array can't register sync_thread, and it can only
|
||||
* add/remove spares.
|
||||
*/
|
||||
if (!md_is_rdwr(mddev))
|
||||
return ACTION_IDLE;
|
||||
|
||||
/*
|
||||
* idle means no sync_thread is running, and no new sync_thread is
|
||||
* requested.
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_NEEDED, &recovery))
|
||||
return ACTION_IDLE;
|
||||
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
|
||||
mddev->reshape_position != MaxSector)
|
||||
return ACTION_RESHAPE;
|
||||
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
return ACTION_RECOVER;
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
|
||||
/*
|
||||
* MD_RECOVERY_CHECK must be paired with
|
||||
* MD_RECOVERY_REQUESTED.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
||||
return ACTION_CHECK;
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &recovery))
|
||||
return ACTION_REPAIR;
|
||||
return ACTION_RESYNC;
|
||||
}
|
||||
|
||||
/*
|
||||
* MD_RECOVERY_NEEDED or MD_RECOVERY_RUNNING is set, however, no
|
||||
* sync_action is specified.
|
||||
*/
|
||||
return ACTION_IDLE;
|
||||
}
|
||||
|
||||
enum sync_action md_sync_action_by_name(const char *page)
|
||||
{
|
||||
enum sync_action action;
|
||||
|
||||
for (action = 0; action < NR_SYNC_ACTIONS; ++action) {
|
||||
if (cmd_match(page, action_name[action]))
|
||||
return action;
|
||||
}
|
||||
|
||||
return NR_SYNC_ACTIONS;
|
||||
}
|
||||
|
||||
const char *md_sync_action_name(enum sync_action action)
|
||||
{
|
||||
return action_name[action];
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
action_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
char *type = "idle";
|
||||
unsigned long recovery = mddev->recovery;
|
||||
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
|
||||
type = "frozen";
|
||||
else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
|
||||
(md_is_rdwr(mddev) && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
|
||||
type = "reshape";
|
||||
else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
|
||||
type = "resync";
|
||||
else if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
||||
type = "check";
|
||||
else
|
||||
type = "repair";
|
||||
} else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
type = "recover";
|
||||
else if (mddev->reshape_position != MaxSector)
|
||||
type = "reshape";
|
||||
}
|
||||
return sprintf(page, "%s\n", type);
|
||||
enum sync_action action = md_sync_action(mddev);
|
||||
|
||||
return sprintf(page, "%s\n", md_sync_action_name(action));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -4899,15 +4908,10 @@ action_show(struct mddev *mddev, char *page)
|
||||
* @locked: if set, reconfig_mutex will still be held after this function
|
||||
* return; if not set, reconfig_mutex will be released after this
|
||||
* function return.
|
||||
* @check_seq: if set, only wait for curent running sync_thread to stop, noted
|
||||
* that new sync_thread can still start.
|
||||
*/
|
||||
static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
|
||||
static void stop_sync_thread(struct mddev *mddev, bool locked)
|
||||
{
|
||||
int sync_seq;
|
||||
|
||||
if (check_seq)
|
||||
sync_seq = atomic_read(&mddev->sync_seq);
|
||||
int sync_seq = atomic_read(&mddev->sync_seq);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
if (!locked)
|
||||
@ -4928,7 +4932,8 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
|
||||
|
||||
wait_event(resync_wait,
|
||||
!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
(check_seq && sync_seq != atomic_read(&mddev->sync_seq)));
|
||||
(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery) &&
|
||||
sync_seq != atomic_read(&mddev->sync_seq)));
|
||||
|
||||
if (locked)
|
||||
mddev_lock_nointr(mddev);
|
||||
@ -4939,7 +4944,7 @@ void md_idle_sync_thread(struct mddev *mddev)
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev, true, true);
|
||||
stop_sync_thread(mddev, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_idle_sync_thread);
|
||||
|
||||
@ -4948,7 +4953,7 @@ void md_frozen_sync_thread(struct mddev *mddev)
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev, true, false);
|
||||
stop_sync_thread(mddev, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
|
||||
|
||||
@ -4963,100 +4968,127 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
|
||||
|
||||
static void idle_sync_thread(struct mddev *mddev)
|
||||
static int mddev_start_reshape(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
int ret;
|
||||
|
||||
if (mddev->pers->start_reshape == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
if (mddev->reshape_position == MaxSector ||
|
||||
mddev->pers->check_reshape == NULL ||
|
||||
mddev->pers->check_reshape(mddev)) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
ret = mddev->pers->start_reshape(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
/*
|
||||
* If reshape is still in progress, and md_check_recovery() can
|
||||
* continue to reshape, don't restart reshape because data can
|
||||
* be corrupted for raid456.
|
||||
*/
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
if (mddev_lock(mddev)) {
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, false, true);
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
}
|
||||
|
||||
static void frozen_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
mutex_lock(&mddev->sync_mutex);
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
if (mddev_lock(mddev)) {
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, false, false);
|
||||
mutex_unlock(&mddev->sync_mutex);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
{
|
||||
int ret;
|
||||
enum sync_action action;
|
||||
|
||||
if (!mddev->pers || !mddev->pers->sync_request)
|
||||
return -EINVAL;
|
||||
|
||||
retry:
|
||||
if (work_busy(&mddev->sync_work))
|
||||
flush_work(&mddev->sync_work);
|
||||
|
||||
if (cmd_match(page, "idle"))
|
||||
idle_sync_thread(mddev);
|
||||
else if (cmd_match(page, "frozen"))
|
||||
frozen_sync_thread(mddev);
|
||||
else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
else if (cmd_match(page, "resync"))
|
||||
ret = mddev_lock(mddev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (work_busy(&mddev->sync_work)) {
|
||||
mddev_unlock(mddev);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
action = md_sync_action_by_name(page);
|
||||
|
||||
/* TODO: mdadm rely on "idle" to start sync_thread. */
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
switch (action) {
|
||||
case ACTION_FROZEN:
|
||||
md_frozen_sync_thread(mddev);
|
||||
ret = len;
|
||||
goto out;
|
||||
case ACTION_IDLE:
|
||||
md_idle_sync_thread(mddev);
|
||||
break;
|
||||
case ACTION_RESHAPE:
|
||||
case ACTION_RECOVER:
|
||||
case ACTION_CHECK:
|
||||
case ACTION_REPAIR:
|
||||
case ACTION_RESYNC:
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
switch (action) {
|
||||
case ACTION_FROZEN:
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
ret = len;
|
||||
goto out;
|
||||
case ACTION_RESHAPE:
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
else if (cmd_match(page, "recover")) {
|
||||
ret = mddev_start_reshape(mddev);
|
||||
if (ret)
|
||||
goto out;
|
||||
break;
|
||||
case ACTION_RECOVER:
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
} else if (cmd_match(page, "reshape")) {
|
||||
int err;
|
||||
if (mddev->pers->start_reshape == NULL)
|
||||
return -EINVAL;
|
||||
err = mddev_lock(mddev);
|
||||
if (!err) {
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
err = -EBUSY;
|
||||
} else if (mddev->reshape_position == MaxSector ||
|
||||
mddev->pers->check_reshape == NULL ||
|
||||
mddev->pers->check_reshape(mddev)) {
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
err = mddev->pers->start_reshape(mddev);
|
||||
} else {
|
||||
/*
|
||||
* If reshape is still in progress, and
|
||||
* md_check_recovery() can continue to reshape,
|
||||
* don't restart reshape because data can be
|
||||
* corrupted for raid456.
|
||||
*/
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
}
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
|
||||
} else {
|
||||
if (cmd_match(page, "check"))
|
||||
break;
|
||||
case ACTION_CHECK:
|
||||
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
else if (!cmd_match(page, "repair"))
|
||||
return -EINVAL;
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
fallthrough;
|
||||
case ACTION_REPAIR:
|
||||
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
fallthrough;
|
||||
case ACTION_RESYNC:
|
||||
case ACTION_IDLE:
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (mddev->ro == MD_AUTO_READ) {
|
||||
/* A write to sync_action is enough to justify
|
||||
* canceling read-auto mode
|
||||
*/
|
||||
flush_work(&mddev->sync_work);
|
||||
mddev->ro = MD_RDWR;
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
}
|
||||
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
return len;
|
||||
ret = len;
|
||||
|
||||
out:
|
||||
mddev_unlock(mddev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry md_scan_mode =
|
||||
@ -5065,7 +5097,8 @@ __ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
|
||||
static ssize_t
|
||||
last_sync_action_show(struct mddev *mddev, char *page)
|
||||
{
|
||||
return sprintf(page, "%s\n", mddev->last_sync_action);
|
||||
return sprintf(page, "%s\n",
|
||||
md_sync_action_name(mddev->last_sync_action));
|
||||
}
|
||||
|
||||
static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
|
||||
@ -5755,14 +5788,20 @@ static const struct kobj_type md_ktype = {
|
||||
int mdp_major = 0;
|
||||
|
||||
/* stack the limit for all rdevs into lim */
|
||||
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim)
|
||||
int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset,
|
||||
mddev->gendisk->disk_name);
|
||||
if ((flags & MDDEV_STACK_INTEGRITY) &&
|
||||
!queue_limits_stack_integrity_bdev(lim, rdev->bdev))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
|
||||
|
||||
@ -5777,6 +5816,14 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||
lim = queue_limits_start_update(mddev->gendisk->queue);
|
||||
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
|
||||
mddev->gendisk->disk_name);
|
||||
|
||||
if (!queue_limits_stack_integrity_bdev(&lim, rdev->bdev)) {
|
||||
pr_err("%s: incompatible integrity profile for %pg\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
return queue_limits_commit_update(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mddev_stack_new_rdev);
|
||||
@ -5806,6 +5853,14 @@ static void mddev_delayed_delete(struct work_struct *ws)
|
||||
kobject_put(&mddev->kobj);
|
||||
}
|
||||
|
||||
void md_init_stacking_limits(struct queue_limits *lim)
|
||||
{
|
||||
blk_set_stacking_limits(lim);
|
||||
lim->features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
|
||||
BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_init_stacking_limits);
|
||||
|
||||
struct mddev *md_alloc(dev_t dev, char *name)
|
||||
{
|
||||
/*
|
||||
@ -5881,7 +5936,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
|
||||
disk->fops = &md_fops;
|
||||
disk->private_data = mddev;
|
||||
|
||||
blk_queue_write_cache(disk->queue, true, true);
|
||||
disk->events |= DISK_EVENT_MEDIA_CHANGE;
|
||||
mddev->gendisk = disk;
|
||||
error = add_disk(disk);
|
||||
@ -6185,28 +6239,6 @@ int md_run(struct mddev *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
struct request_queue *q = mddev->gendisk->queue;
|
||||
bool nonrot = true;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk >= 0 && !bdev_nonrot(rdev->bdev)) {
|
||||
nonrot = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (mddev->degraded)
|
||||
nonrot = false;
|
||||
if (nonrot)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
|
||||
|
||||
/* Set the NOWAIT flags if all underlying devices support it */
|
||||
if (nowait)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
|
||||
}
|
||||
if (pers->sync_request) {
|
||||
if (mddev->kobj.sd &&
|
||||
sysfs_create_group(&mddev->kobj, &md_redundancy_group))
|
||||
@ -6437,7 +6469,7 @@ void md_stop_writes(struct mddev *mddev)
|
||||
{
|
||||
mddev_lock_nointr(mddev);
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
stop_sync_thread(mddev, true, false);
|
||||
stop_sync_thread(mddev, true);
|
||||
__md_stop_writes(mddev);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
@ -6505,7 +6537,7 @@ static int md_set_readonly(struct mddev *mddev)
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, false, false);
|
||||
stop_sync_thread(mddev, false);
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
mddev_lock_nointr(mddev);
|
||||
@ -6551,7 +6583,7 @@ static int do_md_stop(struct mddev *mddev, int mode)
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
}
|
||||
|
||||
stop_sync_thread(mddev, true, false);
|
||||
stop_sync_thread(mddev, true);
|
||||
|
||||
if (mddev->sysfs_active ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
|
||||
@ -7165,15 +7197,6 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
|
||||
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
|
||||
if (!mddev->thread)
|
||||
md_update_sb(mddev, 1);
|
||||
/*
|
||||
* If the new disk does not support REQ_NOWAIT,
|
||||
* disable on the whole MD.
|
||||
*/
|
||||
if (!bdev_nowait(rdev->bdev)) {
|
||||
pr_info("%s: Disabling nowait because %pg does not support nowait\n",
|
||||
mdname(mddev), rdev->bdev);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, mddev->gendisk->queue);
|
||||
}
|
||||
/*
|
||||
* Kick recovery, maybe this spare has to be added to the
|
||||
* array immediately.
|
||||
@ -7742,12 +7765,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
|
||||
return get_bitmap_file(mddev, argp);
|
||||
}
|
||||
|
||||
if (cmd == HOT_REMOVE_DISK)
|
||||
/* need to ensure recovery thread has run */
|
||||
wait_event_interruptible_timeout(mddev->sb_wait,
|
||||
!test_bit(MD_RECOVERY_NEEDED,
|
||||
&mddev->recovery),
|
||||
msecs_to_jiffies(5000));
|
||||
if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
|
||||
/* Need to flush page cache, and ensure no-one else opens
|
||||
* and writes
|
||||
@ -8520,7 +8537,7 @@ int unregister_md_personality(struct md_personality *p)
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_md_personality);
|
||||
|
||||
int register_md_cluster_operations(struct md_cluster_operations *ops,
|
||||
int register_md_cluster_operations(const struct md_cluster_operations *ops,
|
||||
struct module *module)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -8641,12 +8658,12 @@ EXPORT_SYMBOL(md_done_sync);
|
||||
* A return value of 'false' means that the write wasn't recorded
|
||||
* and cannot proceed as the array is being suspend.
|
||||
*/
|
||||
bool md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
void md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
{
|
||||
int did_change = 0;
|
||||
|
||||
if (bio_data_dir(bi) != WRITE)
|
||||
return true;
|
||||
return;
|
||||
|
||||
BUG_ON(mddev->ro == MD_RDONLY);
|
||||
if (mddev->ro == MD_AUTO_READ) {
|
||||
@ -8679,15 +8696,9 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
if (did_change)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
if (!mddev->has_superblocks)
|
||||
return true;
|
||||
return;
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
|
||||
is_md_suspended(mddev));
|
||||
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
|
||||
percpu_ref_put(&mddev->writes_pending);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
|
||||
}
|
||||
EXPORT_SYMBOL(md_write_start);
|
||||
|
||||
@ -8835,6 +8846,77 @@ void md_allow_write(struct mddev *mddev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_allow_write);
|
||||
|
||||
static sector_t md_sync_max_sectors(struct mddev *mddev,
|
||||
enum sync_action action)
|
||||
{
|
||||
switch (action) {
|
||||
case ACTION_RESYNC:
|
||||
case ACTION_CHECK:
|
||||
case ACTION_REPAIR:
|
||||
atomic64_set(&mddev->resync_mismatches, 0);
|
||||
fallthrough;
|
||||
case ACTION_RESHAPE:
|
||||
return mddev->resync_max_sectors;
|
||||
case ACTION_RECOVER:
|
||||
return mddev->dev_sectors;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
|
||||
{
|
||||
sector_t start = 0;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
switch (action) {
|
||||
case ACTION_CHECK:
|
||||
case ACTION_REPAIR:
|
||||
return mddev->resync_min;
|
||||
case ACTION_RESYNC:
|
||||
if (!mddev->bitmap)
|
||||
return mddev->recovery_cp;
|
||||
return 0;
|
||||
case ACTION_RESHAPE:
|
||||
/*
|
||||
* If the original node aborts reshaping then we continue the
|
||||
* reshaping, so set again to avoid restart reshape from the
|
||||
* first beginning
|
||||
*/
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
mddev->reshape_position != MaxSector)
|
||||
return mddev->reshape_position;
|
||||
return 0;
|
||||
case ACTION_RECOVER:
|
||||
start = MaxSector;
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < start)
|
||||
start = rdev->recovery_offset;
|
||||
rcu_read_unlock();
|
||||
|
||||
/* If there is a bitmap, we need to make sure all
|
||||
* writes that started before we added a spare
|
||||
* complete before we start doing a recovery.
|
||||
* Otherwise the write might complete and (via
|
||||
* bitmap_endwrite) set a bit in the bitmap after the
|
||||
* recovery has checked that bit and skipped that
|
||||
* region.
|
||||
*/
|
||||
if (mddev->bitmap) {
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
return start;
|
||||
default:
|
||||
return MaxSector;
|
||||
}
|
||||
}
|
||||
|
||||
#define SYNC_MARKS 10
|
||||
#define SYNC_MARK_STEP (3*HZ)
|
||||
#define UPDATE_FREQUENCY (5*60*HZ)
|
||||
@ -8851,7 +8933,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
sector_t last_check;
|
||||
int skipped = 0;
|
||||
struct md_rdev *rdev;
|
||||
char *desc, *action = NULL;
|
||||
enum sync_action action;
|
||||
const char *desc;
|
||||
struct blk_plug plug;
|
||||
int ret;
|
||||
|
||||
@ -8882,21 +8965,9 @@ void md_do_sync(struct md_thread *thread)
|
||||
goto skip;
|
||||
}
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
|
||||
desc = "data-check";
|
||||
action = "check";
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
desc = "requested-resync";
|
||||
action = "repair";
|
||||
} else
|
||||
desc = "resync";
|
||||
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
desc = "reshape";
|
||||
else
|
||||
desc = "recovery";
|
||||
|
||||
mddev->last_sync_action = action ?: desc;
|
||||
action = md_sync_action(mddev);
|
||||
desc = md_sync_action_name(action);
|
||||
mddev->last_sync_action = action;
|
||||
|
||||
/*
|
||||
* Before starting a resync we must have set curr_resync to
|
||||
@ -8964,56 +9035,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
spin_unlock(&all_mddevs_lock);
|
||||
} while (mddev->curr_resync < MD_RESYNC_DELAYED);
|
||||
|
||||
j = 0;
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
/* resync follows the size requested by the personality,
|
||||
* which defaults to physical size, but can be virtual size
|
||||
*/
|
||||
max_sectors = mddev->resync_max_sectors;
|
||||
atomic64_set(&mddev->resync_mismatches, 0);
|
||||
/* we don't use the checkpoint if there's a bitmap */
|
||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
j = mddev->resync_min;
|
||||
else if (!mddev->bitmap)
|
||||
j = mddev->recovery_cp;
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
|
||||
max_sectors = mddev->resync_max_sectors;
|
||||
/*
|
||||
* If the original node aborts reshaping then we continue the
|
||||
* reshaping, so set j again to avoid restart reshape from the
|
||||
* first beginning
|
||||
*/
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
mddev->reshape_position != MaxSector)
|
||||
j = mddev->reshape_position;
|
||||
} else {
|
||||
/* recovery follows the physical size of devices */
|
||||
max_sectors = mddev->dev_sectors;
|
||||
j = MaxSector;
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
rdev->recovery_offset < j)
|
||||
j = rdev->recovery_offset;
|
||||
rcu_read_unlock();
|
||||
|
||||
/* If there is a bitmap, we need to make sure all
|
||||
* writes that started before we added a spare
|
||||
* complete before we start doing a recovery.
|
||||
* Otherwise the write might complete and (via
|
||||
* bitmap_endwrite) set a bit in the bitmap after the
|
||||
* recovery has checked that bit and skipped that
|
||||
* region.
|
||||
*/
|
||||
if (mddev->bitmap) {
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
}
|
||||
max_sectors = md_sync_max_sectors(mddev, action);
|
||||
j = md_sync_position(mddev, action);
|
||||
|
||||
pr_info("md: %s of RAID array %s\n", desc, mdname(mddev));
|
||||
pr_debug("md: minimum _guaranteed_ speed: %d KB/sec/disk.\n", speed_min(mddev));
|
||||
@ -9095,7 +9118,8 @@ void md_do_sync(struct md_thread *thread)
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
|
||||
break;
|
||||
|
||||
sectors = mddev->pers->sync_request(mddev, j, &skipped);
|
||||
sectors = mddev->pers->sync_request(mddev, j, max_sectors,
|
||||
&skipped);
|
||||
if (sectors == 0) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
break;
|
||||
@ -9185,7 +9209,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
mddev->curr_resync_completed = mddev->curr_resync;
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_completed);
|
||||
}
|
||||
mddev->pers->sync_request(mddev, max_sectors, &skipped);
|
||||
mddev->pers->sync_request(mddev, max_sectors, max_sectors, &skipped);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
|
||||
mddev->curr_resync > MD_RESYNC_ACTIVE) {
|
||||
|
134
drivers/md/md.h
134
drivers/md/md.h
@ -34,6 +34,61 @@
|
||||
*/
|
||||
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
|
||||
|
||||
/* Status of sync thread. */
|
||||
enum sync_action {
|
||||
/*
|
||||
* Represent by MD_RECOVERY_SYNC, start when:
|
||||
* 1) after assemble, sync data from first rdev to other copies, this
|
||||
* must be done first before other sync actions and will only execute
|
||||
* once;
|
||||
* 2) resize the array(notice that this is not reshape), sync data for
|
||||
* the new range;
|
||||
*/
|
||||
ACTION_RESYNC,
|
||||
/*
|
||||
* Represent by MD_RECOVERY_RECOVER, start when:
|
||||
* 1) for new replacement, sync data based on the replace rdev or
|
||||
* available copies from other rdev;
|
||||
* 2) for new member disk while the array is degraded, sync data from
|
||||
* other rdev;
|
||||
* 3) reassemble after power failure or re-add a hot removed rdev, sync
|
||||
* data from first rdev to other copies based on bitmap;
|
||||
*/
|
||||
ACTION_RECOVER,
|
||||
/*
|
||||
* Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED |
|
||||
* MD_RECOVERY_CHECK, start when user echo "check" to sysfs api
|
||||
* sync_action, used to check if data copies from differenct rdev are
|
||||
* the same. The number of mismatch sectors will be exported to user
|
||||
* by sysfs api mismatch_cnt;
|
||||
*/
|
||||
ACTION_CHECK,
|
||||
/*
|
||||
* Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED, start when
|
||||
* user echo "repair" to sysfs api sync_action, usually paired with
|
||||
* ACTION_CHECK, used to force syncing data once user found that there
|
||||
* are inconsistent data,
|
||||
*/
|
||||
ACTION_REPAIR,
|
||||
/*
|
||||
* Represent by MD_RECOVERY_RESHAPE, start when new member disk is added
|
||||
* to the conf, notice that this is different from spares or
|
||||
* replacement;
|
||||
*/
|
||||
ACTION_RESHAPE,
|
||||
/*
|
||||
* Represent by MD_RECOVERY_FROZEN, can be set by sysfs api sync_action
|
||||
* or internal usage like setting the array read-only, will forbid above
|
||||
* actions.
|
||||
*/
|
||||
ACTION_FROZEN,
|
||||
/*
|
||||
* All above actions don't match.
|
||||
*/
|
||||
ACTION_IDLE,
|
||||
NR_SYNC_ACTIONS,
|
||||
};
|
||||
|
||||
/*
|
||||
* The struct embedded in rdev is used to serialize IO.
|
||||
*/
|
||||
@ -371,13 +426,12 @@ struct mddev {
|
||||
struct md_thread __rcu *thread; /* management thread */
|
||||
struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
|
||||
|
||||
/* 'last_sync_action' is initialized to "none". It is set when a
|
||||
* sync operation (i.e "data-check", "requested-resync", "resync",
|
||||
* "recovery", or "reshape") is started. It holds this value even
|
||||
/*
|
||||
* Set when a sync operation is started. It holds this value even
|
||||
* when the sync thread is "frozen" (interrupted) or "idle" (stopped
|
||||
* or finished). It is overwritten when a new sync operation is begun.
|
||||
*/
|
||||
char *last_sync_action;
|
||||
enum sync_action last_sync_action;
|
||||
sector_t curr_resync; /* last block scheduled */
|
||||
/* As resync requests can complete out of order, we cannot easily track
|
||||
* how much resync has been completed. So we occasionally pause until
|
||||
@ -540,8 +594,6 @@ struct mddev {
|
||||
*/
|
||||
struct list_head deleting;
|
||||
|
||||
/* Used to synchronize idle and frozen for action_store() */
|
||||
struct mutex sync_mutex;
|
||||
/* The sequence number for sync thread */
|
||||
atomic_t sync_seq;
|
||||
|
||||
@ -551,22 +603,46 @@ struct mddev {
|
||||
};
|
||||
|
||||
enum recovery_flags {
|
||||
/* flags for sync thread running status */
|
||||
|
||||
/*
|
||||
* If neither SYNC or RESHAPE are set, then it is a recovery.
|
||||
* set when one of sync action is set and new sync thread need to be
|
||||
* registered, or just add/remove spares from conf.
|
||||
*/
|
||||
MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */
|
||||
MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */
|
||||
MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */
|
||||
MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */
|
||||
MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */
|
||||
MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */
|
||||
MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */
|
||||
MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */
|
||||
MD_RECOVERY_RESHAPE, /* A reshape is happening */
|
||||
MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
|
||||
MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
|
||||
MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
|
||||
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
|
||||
MD_RECOVERY_NEEDED,
|
||||
/* sync thread is running, or about to be started */
|
||||
MD_RECOVERY_RUNNING,
|
||||
/* sync thread needs to be aborted for some reason */
|
||||
MD_RECOVERY_INTR,
|
||||
/* sync thread is done and is waiting to be unregistered */
|
||||
MD_RECOVERY_DONE,
|
||||
/* running sync thread must abort immediately, and not restart */
|
||||
MD_RECOVERY_FROZEN,
|
||||
/* waiting for pers->start() to finish */
|
||||
MD_RECOVERY_WAIT,
|
||||
/* interrupted because io-error */
|
||||
MD_RECOVERY_ERROR,
|
||||
|
||||
/* flags determines sync action, see details in enum sync_action */
|
||||
|
||||
/* if just this flag is set, action is resync. */
|
||||
MD_RECOVERY_SYNC,
|
||||
/*
|
||||
* paired with MD_RECOVERY_SYNC, if MD_RECOVERY_CHECK is not set,
|
||||
* action is repair, means user requested resync.
|
||||
*/
|
||||
MD_RECOVERY_REQUESTED,
|
||||
/*
|
||||
* paired with MD_RECOVERY_SYNC and MD_RECOVERY_REQUESTED, action is
|
||||
* check.
|
||||
*/
|
||||
MD_RECOVERY_CHECK,
|
||||
/* recovery, or need to try it */
|
||||
MD_RECOVERY_RECOVER,
|
||||
/* reshape */
|
||||
MD_RECOVERY_RESHAPE,
|
||||
/* remote node is running resync thread */
|
||||
MD_RESYNCING_REMOTE,
|
||||
};
|
||||
|
||||
enum md_ro_state {
|
||||
@ -653,7 +729,8 @@ struct md_personality
|
||||
int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
|
||||
int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
|
||||
int (*spare_active) (struct mddev *mddev);
|
||||
sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
|
||||
sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr,
|
||||
sector_t max_sector, int *skipped);
|
||||
int (*resize) (struct mddev *mddev, sector_t sectors);
|
||||
sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
|
||||
int (*check_reshape) (struct mddev *mddev);
|
||||
@ -772,7 +849,7 @@ static inline void safe_put_page(struct page *p)
|
||||
|
||||
extern int register_md_personality(struct md_personality *p);
|
||||
extern int unregister_md_personality(struct md_personality *p);
|
||||
extern int register_md_cluster_operations(struct md_cluster_operations *ops,
|
||||
extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
|
||||
struct module *module);
|
||||
extern int unregister_md_cluster_operations(void);
|
||||
extern int md_setup_cluster(struct mddev *mddev, int nodes);
|
||||
@ -785,7 +862,10 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
|
||||
extern void md_wakeup_thread(struct md_thread __rcu *thread);
|
||||
extern void md_check_recovery(struct mddev *mddev);
|
||||
extern void md_reap_sync_thread(struct mddev *mddev);
|
||||
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
|
||||
extern enum sync_action md_sync_action(struct mddev *mddev);
|
||||
extern enum sync_action md_sync_action_by_name(const char *page);
|
||||
extern const char *md_sync_action_name(enum sync_action action);
|
||||
extern void md_write_start(struct mddev *mddev, struct bio *bi);
|
||||
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
|
||||
extern void md_write_end(struct mddev *mddev);
|
||||
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
|
||||
@ -809,11 +889,11 @@ extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors);
|
||||
extern int md_check_no_bitmap(struct mddev *mddev);
|
||||
extern int md_integrity_register(struct mddev *mddev);
|
||||
extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
|
||||
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
|
||||
|
||||
extern int mddev_init(struct mddev *mddev);
|
||||
extern void mddev_destroy(struct mddev *mddev);
|
||||
void md_init_stacking_limits(struct queue_limits *lim);
|
||||
struct mddev *md_alloc(dev_t dev, char *name);
|
||||
void mddev_put(struct mddev *mddev);
|
||||
extern int md_run(struct mddev *mddev);
|
||||
@ -852,7 +932,7 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
extern struct md_cluster_operations *md_cluster_ops;
|
||||
extern const struct md_cluster_operations *md_cluster_ops;
|
||||
static inline int mddev_is_clustered(struct mddev *mddev)
|
||||
{
|
||||
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
|
||||
@ -908,7 +988,9 @@ void md_autostart_arrays(int part);
|
||||
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
|
||||
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
|
||||
int do_md_run(struct mddev *mddev);
|
||||
void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim);
|
||||
#define MDDEV_STACK_INTEGRITY (1u << 0)
|
||||
int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
|
||||
unsigned int flags);
|
||||
int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev);
|
||||
void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes);
|
||||
|
||||
|
@ -365,30 +365,30 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
|
||||
return array_sectors;
|
||||
}
|
||||
|
||||
static void free_conf(struct mddev *mddev, struct r0conf *conf)
|
||||
static void raid0_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct r0conf *conf = priv;
|
||||
|
||||
kfree(conf->strip_zone);
|
||||
kfree(conf->devlist);
|
||||
kfree(conf);
|
||||
}
|
||||
|
||||
static void raid0_free(struct mddev *mddev, void *priv)
|
||||
{
|
||||
struct r0conf *conf = priv;
|
||||
|
||||
free_conf(mddev, conf);
|
||||
}
|
||||
|
||||
static int raid0_set_limits(struct mddev *mddev)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int err;
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
md_init_stacking_limits(&lim);
|
||||
lim.max_hw_sectors = mddev->chunk_sectors;
|
||||
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * mddev->raid_disks;
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||
if (err) {
|
||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||
return err;
|
||||
}
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
@ -415,7 +415,7 @@ static int raid0_run(struct mddev *mddev)
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = raid0_set_limits(mddev);
|
||||
if (ret)
|
||||
goto out_free_conf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* calculate array device size */
|
||||
@ -427,13 +427,7 @@ static int raid0_run(struct mddev *mddev)
|
||||
|
||||
dump_zones(mddev);
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret)
|
||||
goto out_free_conf;
|
||||
return 0;
|
||||
out_free_conf:
|
||||
free_conf(mddev, conf);
|
||||
return ret;
|
||||
return md_integrity_register(mddev);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1687,8 +1687,7 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
|
||||
if (bio_data_dir(bio) == READ)
|
||||
raid1_read_request(mddev, bio, sectors, NULL);
|
||||
else {
|
||||
if (!md_write_start(mddev,bio))
|
||||
return false;
|
||||
md_write_start(mddev,bio);
|
||||
raid1_write_request(mddev, bio, sectors);
|
||||
}
|
||||
return true;
|
||||
@ -1907,9 +1906,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (mddev->recovery_disabled == conf->recovery_disabled)
|
||||
return -EBUSY;
|
||||
|
||||
if (md_integrity_add_rdev(rdev, mddev))
|
||||
return -ENXIO;
|
||||
|
||||
if (rdev->raid_disk >= 0)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
@ -2757,12 +2753,12 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
|
||||
*/
|
||||
|
||||
static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
int *skipped)
|
||||
sector_t max_sector, int *skipped)
|
||||
{
|
||||
struct r1conf *conf = mddev->private;
|
||||
struct r1bio *r1_bio;
|
||||
struct bio *bio;
|
||||
sector_t max_sector, nr_sectors;
|
||||
sector_t nr_sectors;
|
||||
int disk = -1;
|
||||
int i;
|
||||
int wonly = -1;
|
||||
@ -2778,7 +2774,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
if (init_resync(conf))
|
||||
return 0;
|
||||
|
||||
max_sector = mddev->dev_sectors;
|
||||
if (sector_nr >= max_sector) {
|
||||
/* If we aborted, we need to abort the
|
||||
* sync on the 'current' bitmap chunk (there will
|
||||
@ -3197,14 +3192,18 @@ static struct r1conf *setup_conf(struct mddev *mddev)
|
||||
static int raid1_set_limits(struct mddev *mddev)
|
||||
{
|
||||
struct queue_limits lim;
|
||||
int err;
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
md_init_stacking_limits(&lim);
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||
if (err) {
|
||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||
return err;
|
||||
}
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
static void raid1_free(struct mddev *mddev, void *priv);
|
||||
static int raid1_run(struct mddev *mddev)
|
||||
{
|
||||
struct r1conf *conf;
|
||||
@ -3238,7 +3237,7 @@ static int raid1_run(struct mddev *mddev)
|
||||
if (!mddev_is_dm(mddev)) {
|
||||
ret = raid1_set_limits(mddev);
|
||||
if (ret)
|
||||
goto abort;
|
||||
return ret;
|
||||
}
|
||||
|
||||
mddev->degraded = 0;
|
||||
@ -3252,8 +3251,7 @@ static int raid1_run(struct mddev *mddev)
|
||||
*/
|
||||
if (conf->raid_disks - mddev->degraded < 1) {
|
||||
md_unregister_thread(mddev, &conf->thread);
|
||||
ret = -EINVAL;
|
||||
goto abort;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (conf->raid_disks - mddev->degraded == 1)
|
||||
@ -3277,14 +3275,8 @@ static int raid1_run(struct mddev *mddev)
|
||||
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
|
||||
|
||||
ret = md_integrity_register(mddev);
|
||||
if (ret) {
|
||||
if (ret)
|
||||
md_unregister_thread(mddev, &mddev->thread);
|
||||
goto abort;
|
||||
}
|
||||
return 0;
|
||||
|
||||
abort:
|
||||
raid1_free(mddev, conf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1836,8 +1836,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
|
||||
&& md_flush_request(mddev, bio))
|
||||
return true;
|
||||
|
||||
if (!md_write_start(mddev, bio))
|
||||
return false;
|
||||
md_write_start(mddev, bio);
|
||||
|
||||
if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
|
||||
if (!raid10_handle_discard(mddev, bio))
|
||||
@ -2083,9 +2082,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
|
||||
return -EINVAL;
|
||||
|
||||
if (md_integrity_add_rdev(rdev, mddev))
|
||||
return -ENXIO;
|
||||
|
||||
if (rdev->raid_disk >= 0)
|
||||
first = last = rdev->raid_disk;
|
||||
|
||||
@ -3140,12 +3136,12 @@ static void raid10_set_cluster_sync_high(struct r10conf *conf)
|
||||
*/
|
||||
|
||||
static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
int *skipped)
|
||||
sector_t max_sector, int *skipped)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct r10bio *r10_bio;
|
||||
struct bio *biolist = NULL, *bio;
|
||||
sector_t max_sector, nr_sectors;
|
||||
sector_t nr_sectors;
|
||||
int i;
|
||||
int max_sync;
|
||||
sector_t sync_blocks;
|
||||
@ -3175,10 +3171,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
return 0;
|
||||
|
||||
skipped:
|
||||
max_sector = mddev->dev_sectors;
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
max_sector = mddev->resync_max_sectors;
|
||||
if (sector_nr >= max_sector) {
|
||||
conf->cluster_sync_low = 0;
|
||||
conf->cluster_sync_high = 0;
|
||||
@ -3980,12 +3972,17 @@ static int raid10_set_queue_limits(struct mddev *mddev)
|
||||
{
|
||||
struct r10conf *conf = mddev->private;
|
||||
struct queue_limits lim;
|
||||
int err;
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
md_init_stacking_limits(&lim);
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
|
||||
if (err) {
|
||||
queue_limits_cancel_update(mddev->gendisk->queue);
|
||||
return err;
|
||||
}
|
||||
return queue_limits_set(mddev->gendisk->queue, &lim);
|
||||
}
|
||||
|
||||
|
@ -5899,6 +5899,39 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
enum reshape_loc {
|
||||
LOC_NO_RESHAPE,
|
||||
LOC_AHEAD_OF_RESHAPE,
|
||||
LOC_INSIDE_RESHAPE,
|
||||
LOC_BEHIND_RESHAPE,
|
||||
};
|
||||
|
||||
static enum reshape_loc get_reshape_loc(struct mddev *mddev,
|
||||
struct r5conf *conf, sector_t logical_sector)
|
||||
{
|
||||
sector_t reshape_progress, reshape_safe;
|
||||
/*
|
||||
* Spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
* possible to see a half-updated value
|
||||
* Of course reshape_progress could change after
|
||||
* the lock is dropped, so once we get a reference
|
||||
* to the stripe that we think it is, we will have
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
reshape_progress = conf->reshape_progress;
|
||||
reshape_safe = conf->reshape_safe;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
if (reshape_progress == MaxSector)
|
||||
return LOC_NO_RESHAPE;
|
||||
if (ahead_of_reshape(mddev, logical_sector, reshape_progress))
|
||||
return LOC_AHEAD_OF_RESHAPE;
|
||||
if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
|
||||
return LOC_INSIDE_RESHAPE;
|
||||
return LOC_BEHIND_RESHAPE;
|
||||
}
|
||||
|
||||
static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
struct r5conf *conf, struct stripe_request_ctx *ctx,
|
||||
sector_t logical_sector, struct bio *bi)
|
||||
@ -5913,28 +5946,14 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
|
||||
seq = read_seqcount_begin(&conf->gen_lock);
|
||||
|
||||
if (unlikely(conf->reshape_progress != MaxSector)) {
|
||||
/*
|
||||
* Spinlock is needed as reshape_progress may be
|
||||
* 64bit on a 32bit platform, and so it might be
|
||||
* possible to see a half-updated value
|
||||
* Of course reshape_progress could change after
|
||||
* the lock is dropped, so once we get a reference
|
||||
* to the stripe that we think it is, we will have
|
||||
* to check again.
|
||||
*/
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_progress)) {
|
||||
previous = 1;
|
||||
} else {
|
||||
if (ahead_of_reshape(mddev, logical_sector,
|
||||
conf->reshape_safe)) {
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
enum reshape_loc loc = get_reshape_loc(mddev, conf,
|
||||
logical_sector);
|
||||
if (loc == LOC_INSIDE_RESHAPE) {
|
||||
ret = STRIPE_SCHEDULE_AND_RETRY;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
if (loc == LOC_AHEAD_OF_RESHAPE)
|
||||
previous = 1;
|
||||
}
|
||||
|
||||
new_sector = raid5_compute_sector(conf, logical_sector, previous,
|
||||
@ -6078,8 +6097,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
|
||||
}
|
||||
|
||||
if (!md_write_start(mddev, bi))
|
||||
return false;
|
||||
md_write_start(mddev, bi);
|
||||
/*
|
||||
* If array is degraded, better not do chunk aligned read because
|
||||
* later we might have to read it again in order to reconstruct
|
||||
@ -6113,8 +6131,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
|
||||
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
|
||||
if ((bi->bi_opf & REQ_NOWAIT) &&
|
||||
(conf->reshape_progress != MaxSector) &&
|
||||
!ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
|
||||
ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
|
||||
get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
|
||||
bio_wouldblock_error(bi);
|
||||
if (rw == WRITE)
|
||||
md_write_end(mddev);
|
||||
@ -6255,7 +6272,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
safepos = conf->reshape_safe;
|
||||
sector_div(safepos, data_disks);
|
||||
if (mddev->reshape_backwards) {
|
||||
BUG_ON(writepos < reshape_sectors);
|
||||
if (WARN_ON(writepos < reshape_sectors))
|
||||
return MaxSector;
|
||||
|
||||
writepos -= reshape_sectors;
|
||||
readpos += reshape_sectors;
|
||||
safepos += reshape_sectors;
|
||||
@ -6273,14 +6292,18 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
|
||||
* to set 'stripe_addr' which is where we will write to.
|
||||
*/
|
||||
if (mddev->reshape_backwards) {
|
||||
BUG_ON(conf->reshape_progress == 0);
|
||||
if (WARN_ON(conf->reshape_progress == 0))
|
||||
return MaxSector;
|
||||
|
||||
stripe_addr = writepos;
|
||||
BUG_ON((mddev->dev_sectors &
|
||||
~((sector_t)reshape_sectors - 1))
|
||||
- reshape_sectors - stripe_addr
|
||||
!= sector_nr);
|
||||
if (WARN_ON((mddev->dev_sectors &
|
||||
~((sector_t)reshape_sectors - 1)) -
|
||||
reshape_sectors - stripe_addr != sector_nr))
|
||||
return MaxSector;
|
||||
} else {
|
||||
BUG_ON(writepos != sector_nr + reshape_sectors);
|
||||
if (WARN_ON(writepos != sector_nr + reshape_sectors))
|
||||
return MaxSector;
|
||||
|
||||
stripe_addr = sector_nr;
|
||||
}
|
||||
|
||||
@ -6458,11 +6481,10 @@ ret:
|
||||
}
|
||||
|
||||
static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
|
||||
int *skipped)
|
||||
sector_t max_sector, int *skipped)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct stripe_head *sh;
|
||||
sector_t max_sector = mddev->dev_sectors;
|
||||
sector_t sync_blocks;
|
||||
int still_degraded = 0;
|
||||
int i;
|
||||
@ -7082,12 +7104,14 @@ raid5_store_skip_copy(struct mddev *mddev, const char *page, size_t len)
|
||||
err = -ENODEV;
|
||||
else if (new != conf->skip_copy) {
|
||||
struct request_queue *q = mddev->gendisk->queue;
|
||||
struct queue_limits lim = queue_limits_start_update(q);
|
||||
|
||||
conf->skip_copy = new;
|
||||
if (new)
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
lim.features |= BLK_FEAT_STABLE_WRITES;
|
||||
else
|
||||
blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q);
|
||||
lim.features &= ~BLK_FEAT_STABLE_WRITES;
|
||||
err = queue_limits_commit_update(q, &lim);
|
||||
}
|
||||
mddev_unlock_and_resume(mddev);
|
||||
return err ?: len;
|
||||
@ -7562,11 +7586,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (test_bit(Replacement, &rdev->flags)) {
|
||||
if (disk->replacement)
|
||||
goto abort;
|
||||
RCU_INIT_POINTER(disk->replacement, rdev);
|
||||
disk->replacement = rdev;
|
||||
} else {
|
||||
if (disk->rdev)
|
||||
goto abort;
|
||||
RCU_INIT_POINTER(disk->rdev, rdev);
|
||||
disk->rdev = rdev;
|
||||
}
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags)) {
|
||||
@ -7702,13 +7726,13 @@ static int raid5_set_limits(struct mddev *mddev)
|
||||
*/
|
||||
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
|
||||
|
||||
blk_set_stacking_limits(&lim);
|
||||
md_init_stacking_limits(&lim);
|
||||
lim.io_min = mddev->chunk_sectors << 9;
|
||||
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
|
||||
lim.raid_partial_stripes_expensive = 1;
|
||||
lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
|
||||
lim.discard_granularity = stripe;
|
||||
lim.max_write_zeroes_sectors = 0;
|
||||
mddev_stack_rdev_limits(mddev, &lim);
|
||||
mddev_stack_rdev_limits(mddev, &lim, 0);
|
||||
rdev_for_each(rdev, mddev)
|
||||
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
|
||||
mddev->gendisk->disk_name);
|
||||
@ -8062,15 +8086,13 @@ static void print_raid5_conf (struct r5conf *conf)
|
||||
conf->raid_disks,
|
||||
conf->raid_disks - conf->mddev->degraded);
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
rdev = conf->disks[i].rdev;
|
||||
if (rdev)
|
||||
pr_debug(" disk %d, o:%d, dev:%pg\n",
|
||||
i, !test_bit(Faulty, &rdev->flags),
|
||||
rdev->bdev);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int raid5_spare_active(struct mddev *mddev)
|
||||
|
@ -2466,8 +2466,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
struct mmc_blk_data *md;
|
||||
int devidx, ret;
|
||||
char cap_str[10];
|
||||
bool cache_enabled = false;
|
||||
bool fua_enabled = false;
|
||||
unsigned int features = 0;
|
||||
|
||||
devidx = ida_alloc_max(&mmc_blk_ida, max_devices - 1, GFP_KERNEL);
|
||||
if (devidx < 0) {
|
||||
@ -2499,7 +2498,24 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
*/
|
||||
md->read_only = mmc_blk_readonly(card);
|
||||
|
||||
md->disk = mmc_init_queue(&md->queue, card);
|
||||
if (mmc_host_cmd23(card->host)) {
|
||||
if ((mmc_card_mmc(card) &&
|
||||
card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
|
||||
(mmc_card_sd(card) &&
|
||||
card->scr.cmds & SD_SCR_CMD23_SUPPORT))
|
||||
md->flags |= MMC_BLK_CMD23;
|
||||
}
|
||||
|
||||
if (md->flags & MMC_BLK_CMD23 &&
|
||||
((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
|
||||
card->ext_csd.rel_sectors)) {
|
||||
md->flags |= MMC_BLK_REL_WR;
|
||||
features |= (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA);
|
||||
} else if (mmc_cache_enabled(card->host)) {
|
||||
features |= BLK_FEAT_WRITE_CACHE;
|
||||
}
|
||||
|
||||
md->disk = mmc_init_queue(&md->queue, card, features);
|
||||
if (IS_ERR(md->disk)) {
|
||||
ret = PTR_ERR(md->disk);
|
||||
goto err_kfree;
|
||||
@ -2539,26 +2555,6 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
|
||||
|
||||
set_capacity(md->disk, size);
|
||||
|
||||
if (mmc_host_cmd23(card->host)) {
|
||||
if ((mmc_card_mmc(card) &&
|
||||
card->csd.mmca_vsn >= CSD_SPEC_VER_3) ||
|
||||
(mmc_card_sd(card) &&
|
||||
card->scr.cmds & SD_SCR_CMD23_SUPPORT))
|
||||
md->flags |= MMC_BLK_CMD23;
|
||||
}
|
||||
|
||||
if (md->flags & MMC_BLK_CMD23 &&
|
||||
((card->ext_csd.rel_param & EXT_CSD_WR_REL_PARAM_EN) ||
|
||||
card->ext_csd.rel_sectors)) {
|
||||
md->flags |= MMC_BLK_REL_WR;
|
||||
fua_enabled = true;
|
||||
cache_enabled = true;
|
||||
}
|
||||
if (mmc_cache_enabled(card->host))
|
||||
cache_enabled = true;
|
||||
|
||||
blk_queue_write_cache(md->queue.queue, cache_enabled, fua_enabled);
|
||||
|
||||
string_get_size((u64)size, 512, STRING_UNITS_2,
|
||||
cap_str, sizeof(cap_str));
|
||||
pr_info("%s: %s %s %s%s\n",
|
||||
|
@ -344,10 +344,12 @@ static const struct blk_mq_ops mmc_mq_ops = {
|
||||
};
|
||||
|
||||
static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
|
||||
struct mmc_card *card)
|
||||
struct mmc_card *card, unsigned int features)
|
||||
{
|
||||
struct mmc_host *host = card->host;
|
||||
struct queue_limits lim = { };
|
||||
struct queue_limits lim = {
|
||||
.features = features,
|
||||
};
|
||||
struct gendisk *disk;
|
||||
|
||||
if (mmc_can_erase(card))
|
||||
@ -376,18 +378,16 @@ static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq,
|
||||
lim.max_segments = host->max_segs;
|
||||
}
|
||||
|
||||
if (mmc_host_is_spi(host) && host->use_spi_crc)
|
||||
lim.features |= BLK_FEAT_STABLE_WRITES;
|
||||
|
||||
disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq);
|
||||
if (IS_ERR(disk))
|
||||
return disk;
|
||||
mq->queue = disk->queue;
|
||||
|
||||
if (mmc_host_is_spi(host) && host->use_spi_crc)
|
||||
blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue);
|
||||
blk_queue_rq_timeout(mq->queue, 60 * HZ);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue);
|
||||
|
||||
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
|
||||
|
||||
INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler);
|
||||
@ -413,10 +413,12 @@ static inline bool mmc_merge_capable(struct mmc_host *host)
|
||||
* mmc_init_queue - initialise a queue structure.
|
||||
* @mq: mmc queue
|
||||
* @card: mmc card to attach this queue
|
||||
* @features: block layer features (BLK_FEAT_*)
|
||||
*
|
||||
* Initialise a MMC card request queue.
|
||||
*/
|
||||
struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
|
||||
unsigned int features)
|
||||
{
|
||||
struct mmc_host *host = card->host;
|
||||
struct gendisk *disk;
|
||||
@ -460,7 +462,7 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
|
||||
disk = mmc_alloc_disk(mq, card);
|
||||
disk = mmc_alloc_disk(mq, card, features);
|
||||
if (IS_ERR(disk))
|
||||
blk_mq_free_tag_set(&mq->tag_set);
|
||||
return disk;
|
||||
|
@ -94,7 +94,8 @@ struct mmc_queue {
|
||||
struct work_struct complete_work;
|
||||
};
|
||||
|
||||
struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card);
|
||||
struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
|
||||
unsigned int features);
|
||||
extern void mmc_cleanup_queue(struct mmc_queue *);
|
||||
extern void mmc_queue_suspend(struct mmc_queue *);
|
||||
extern void mmc_queue_resume(struct mmc_queue *);
|
||||
|
@ -336,6 +336,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
lim.logical_block_size = tr->blksize;
|
||||
if (tr->discard)
|
||||
lim.max_hw_discard_sectors = UINT_MAX;
|
||||
if (tr->flush)
|
||||
lim.features |= BLK_FEAT_WRITE_CACHE;
|
||||
|
||||
/* Create gendisk */
|
||||
gd = blk_mq_alloc_disk(new->tag_set, &lim, new);
|
||||
@ -372,13 +374,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
|
||||
/* Create the request queue */
|
||||
spin_lock_init(&new->queue_lock);
|
||||
INIT_LIST_HEAD(&new->rq_list);
|
||||
|
||||
if (tr->flush)
|
||||
blk_queue_write_cache(new->rq, true, false);
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, new->rq);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, new->rq);
|
||||
|
||||
gd->queue = new->rq;
|
||||
|
||||
if (new->readonly)
|
||||
|
@ -1501,9 +1501,15 @@ static int btt_blk_init(struct btt *btt)
|
||||
.logical_block_size = btt->sector_size,
|
||||
.max_hw_sectors = UINT_MAX,
|
||||
.max_integrity_segments = 1,
|
||||
.features = BLK_FEAT_SYNCHRONOUS,
|
||||
};
|
||||
int rc;
|
||||
|
||||
if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
|
||||
lim.integrity.tuple_size = btt_meta_size(btt);
|
||||
lim.integrity.tag_size = btt_meta_size(btt);
|
||||
}
|
||||
|
||||
btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
|
||||
if (IS_ERR(btt->btt_disk))
|
||||
return PTR_ERR(btt->btt_disk);
|
||||
@ -1513,17 +1519,6 @@ static int btt_blk_init(struct btt *btt)
|
||||
btt->btt_disk->fops = &btt_fops;
|
||||
btt->btt_disk->private_data = btt;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue);
|
||||
|
||||
if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) {
|
||||
struct blk_integrity bi = {
|
||||
.tuple_size = btt_meta_size(btt),
|
||||
.tag_size = btt_meta_size(btt),
|
||||
};
|
||||
blk_integrity_register(btt->btt_disk, &bi);
|
||||
}
|
||||
|
||||
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
|
||||
rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
|
||||
if (rc)
|
||||
|
@ -455,6 +455,8 @@ static int pmem_attach_disk(struct device *dev,
|
||||
.logical_block_size = pmem_sector_size(ndns),
|
||||
.physical_block_size = PAGE_SIZE,
|
||||
.max_hw_sectors = UINT_MAX,
|
||||
.features = BLK_FEAT_WRITE_CACHE |
|
||||
BLK_FEAT_SYNCHRONOUS,
|
||||
};
|
||||
int nid = dev_to_node(dev), fua;
|
||||
struct resource *res = &nsio->res;
|
||||
@ -463,7 +465,6 @@ static int pmem_attach_disk(struct device *dev,
|
||||
struct dax_device *dax_dev;
|
||||
struct nd_pfn_sb *pfn_sb;
|
||||
struct pmem_device *pmem;
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
void *addr;
|
||||
int rc;
|
||||
@ -495,6 +496,10 @@ static int pmem_attach_disk(struct device *dev,
|
||||
dev_warn(dev, "unable to guarantee persistence of writes\n");
|
||||
fua = 0;
|
||||
}
|
||||
if (fua)
|
||||
lim.features |= BLK_FEAT_FUA;
|
||||
if (is_nd_pfn(dev))
|
||||
lim.features |= BLK_FEAT_DAX;
|
||||
|
||||
if (!devm_request_mem_region(dev, res->start, resource_size(res),
|
||||
dev_name(&ndns->dev))) {
|
||||
@ -505,7 +510,6 @@ static int pmem_attach_disk(struct device *dev,
|
||||
disk = blk_alloc_disk(&lim, nid);
|
||||
if (IS_ERR(disk))
|
||||
return PTR_ERR(disk);
|
||||
q = disk->queue;
|
||||
|
||||
pmem->disk = disk;
|
||||
pmem->pgmap.owner = pmem;
|
||||
@ -543,12 +547,6 @@ static int pmem_attach_disk(struct device *dev,
|
||||
}
|
||||
pmem->virt_addr = addr;
|
||||
|
||||
blk_queue_write_cache(q, true, fua);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
|
||||
blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, q);
|
||||
if (pmem->pfn_flags & PFN_MAP)
|
||||
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
|
||||
|
||||
disk->fops = &pmem_fops;
|
||||
disk->private_data = pmem;
|
||||
nvdimm_namespace_disk_name(ndns, disk->disk_name);
|
||||
|
@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config NVME_CORE
|
||||
tristate
|
||||
select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
|
||||
|
||||
config BLK_DEV_NVME
|
||||
tristate "NVM Express block device"
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user