From 2569063c7140c65a0d0ad075e95ddfbcda9ba3c0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 27 Dec 2020 19:34:58 +0800 Subject: [PATCH 01/12] blk-mq: test QUEUE_FLAG_HCTX_ACTIVE for sbitmap_shared in hctx_may_queue In case of blk_mq_is_sbitmap_shared(), we should test QUEUE_FLAG_HCTX_ACTIVE against q->queue_flags instead of BLK_MQ_S_TAG_ACTIVE. So fix it. Cc: John Garry Cc: Kashyap Desai Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap") Signed-off-by: Ming Lei Reviewed-by: John Garry Signed-off-by: Jens Axboe --- block/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.h b/block/blk-mq.h index c1458d9502f1..3616453ca28c 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -304,7 +304,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, struct request_queue *q = hctx->queue; struct blk_mq_tag_set *set = q->tag_set; - if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) return true; users = atomic_read(&set->active_queues_shared_sbitmap); } else { From ef49d40b61a3e18a11edd5eb1c30b0183af9e850 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 17 Jan 2021 16:50:17 +0800 Subject: [PATCH 02/12] block: Fix an error handling in add_partition Once we have called device_initialize(), we should use put_device() to give up the reference on error, just like what we have done on failure of device_add(). Signed-off-by: Dinghao Liu Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/partitions/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/core.c b/block/partitions/core.c index e7d776db803b..23460cee9de5 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -384,7 +384,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, err = blk_alloc_devt(bdev, &devt); if (err) - goto out_bdput; + goto out_put; pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ From ac55ad2b5fadb6af8826963d7d3331c9950a2608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Mon, 18 Jan 2021 17:55:18 +0100 Subject: [PATCH 03/12] s390/dasd: Fix inconsistent kobject removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our intention was to only remove path kobjects whenever a device is being set offline. However, one corner case was missing. If a device is disabled and enabled (using the IOCTLs BIODASDDISABLE and BIODASDENABLE respectively), the enabling process will call dasd_eckd_reload_device() which itself calls dasd_eckd_read_conf() in order to update path information. During that update, dasd_eckd_clear_conf_data() clears all old data and also removes all kobjects. This will leave us with an inconsistent state of path kobjects and a subsequent path verification leads to a failing kobject creation. Fix this by removing kobjects only in the context of offlining a device as initially intended. Fixes: 19508b204740 ("s390/dasd: Display FC Endpoint Security information via sysfs") Reported-by: Stefan Haberland Signed-off-by: Jan Höppner Reviewed-by: Stefan Haberland Reviewed-by: Cornelia Huck Signed-off-by: Jens Axboe --- drivers/s390/block/dasd_devmap.c | 20 ++++++++++++++------ drivers/s390/block/dasd_eckd.c | 3 ++- drivers/s390/block/dasd_int.h | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 16bb135c20aa..03d27ee9cac6 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -1874,18 +1874,26 @@ void dasd_path_create_kobjects(struct dasd_device *device) } EXPORT_SYMBOL(dasd_path_create_kobjects); -/* - * As we keep kobjects for the lifetime of a device, this function must not be - * called anywhere but in the context of offlining a device. - */ -void dasd_path_remove_kobj(struct dasd_device *device, int chp) +static void dasd_path_remove_kobj(struct dasd_device *device, int chp) { if (device->path[chp].in_sysfs) { kobject_put(&device->path[chp].kobj); device->path[chp].in_sysfs = false; } } -EXPORT_SYMBOL(dasd_path_remove_kobj); + +/* + * As we keep kobjects for the lifetime of a device, this function must not be + * called anywhere but in the context of offlining a device. + */ +void dasd_path_remove_kobjects(struct dasd_device *device) +{ + int i; + + for (i = 0; i < 8; i++) + dasd_path_remove_kobj(device, i); +} +EXPORT_SYMBOL(dasd_path_remove_kobjects); int dasd_add_sysfs_files(struct ccw_device *cdev) { diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 3caa1ee5f4b0..65eb87cbbb9b 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1036,7 +1036,6 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device) device->path[i].ssid = 0; device->path[i].chpid = 0; dasd_path_notoper(device, i); - dasd_path_remove_kobj(device, i); } } @@ -2173,6 +2172,7 @@ out_err2: device->block = NULL; out_err1: dasd_eckd_clear_conf_data(device); + dasd_path_remove_kobjects(device); kfree(device->private); device->private = NULL; return rc; @@ -2191,6 +2191,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) private->vdsneq = NULL; private->gneq = NULL; dasd_eckd_clear_conf_data(device); + dasd_path_remove_kobjects(device); } static struct dasd_ccw_req * diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 3bc008f9136c..b8a04c42d1d2 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -858,7 +858,7 @@ int dasd_add_sysfs_files(struct ccw_device *); void dasd_remove_sysfs_files(struct ccw_device *); void dasd_path_create_kobj(struct dasd_device *, int); void dasd_path_create_kobjects(struct dasd_device *); -void dasd_path_remove_kobj(struct dasd_device *, int); +void dasd_path_remove_kobjects(struct dasd_device *); struct dasd_device *dasd_device_from_cdev(struct ccw_device *); struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *); From b98e762e3d71e893b221f871825dc64694cfb258 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Jan 2021 12:21:02 -0500 Subject: [PATCH 04/12] nbd: freeze the queue while we're adding connections When setting up a device, we can krealloc the config->socks array to add new sockets to the configuration. However if we happen to get a IO request in at this point even though we aren't setup we could hit a UAF, as we deref config->socks without any locking, assuming that the configuration was setup already and that ->socks is safe to access it as we have a reference on the configuration. But there's nothing really preventing IO from occurring at this point of the device setup, we don't want to incur the overhead of a lock to access ->socks when it will never change while the device is running. To fix this UAF scenario simply freeze the queue if we are adding sockets. This will protect us from this particular case without adding any additional overhead for the normal running case. Cc: stable@vger.kernel.org Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 6727358e147d..e6ea5d344f87 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1022,6 +1022,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, if (!sock) return err; + /* + * We need to make sure we don't get any errant requests while we're + * reallocating the ->socks array. + */ + blk_mq_freeze_queue(nbd->disk->queue); + if (!netlink && !nbd->task_setup && !test_bit(NBD_RT_BOUND, &config->runtime_flags)) nbd->task_setup = current; @@ -1060,10 +1066,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, nsock->cookie = 0; socks[config->num_connections++] = nsock; atomic_inc(&config->live_connections); + blk_mq_unfreeze_queue(nbd->disk->queue); return 0; put_socket: + blk_mq_unfreeze_queue(nbd->disk->queue); sockfd_put(sock); return err; } From 8dc932d3e8afb65e12eba7495f046c83884c49bf Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Tue, 26 Jan 2021 21:59:07 +0200 Subject: [PATCH 05/12] Revert "block: simplify set_init_blocksize" to regain lost performance The cited commit introduced a serious regression with SATA write speed, as found by bisecting. This patch reverts this commit, which restores write speed back to the values observed before this commit. The performance tests were done on a Helios4 NAS (2nd batch) with 4 HDDs (WD8003FFBX) using dd (bs=1M count=2000). "Direct" is a test with a single HDD, the rest are different RAID levels built over the first partitions of 4 HDDs. Test results are in MB/s, R is read, W is write. | Direct | RAID0 | RAID10 f2 | RAID10 n2 | RAID6 ----------------+--------+-------+-----------+-----------+-------- 9011495c9466 | R:256 | R:313 | R:276 | R:313 | R:323 (before faulty) | W:254 | W:253 | W:195 | W:204 | W:117 ----------------+--------+-------+-----------+-----------+-------- 5ff9f19231a0 | R:257 | R:398 | R:312 | R:344 | R:391 (faulty commit) | W:154 | W:122 | W:67.7 | W:66.6 | W:67.2 ----------------+--------+-------+-----------+-----------+-------- 5.10.10 | R:256 | R:401 | R:312 | R:356 | R:375 unpatched | W:149 | W:123 | W:64 | W:64.1 | W:61.5 ----------------+--------+-------+-----------+-----------+-------- 5.10.10 | R:255 | R:396 | R:312 | R:340 | R:393 patched | W:247 | W:274 | W:220 | W:225 | W:121 Applying this patch doesn't hurt read performance, while improves the write speed by 1.5x - 3.5x (more impact on RAID tests). The write speed is restored back to the state before the faulty commit, and even a bit higher in RAID tests (which aren't HDD-bound on this device) - that is likely related to other optimizations done between the faulty commit and 5.10.10 which also improved the read speed. Signed-off-by: Maxim Mikityanskiy Fixes: 5ff9f19231a0 ("block: simplify set_init_blocksize") Cc: Christoph Hellwig Cc: Jens Axboe Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/block_dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 3b8963e228a1..235b5042672e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -130,7 +130,15 @@ EXPORT_SYMBOL(truncate_bdev_range); static void set_init_blocksize(struct block_device *bdev) { - bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev)); + unsigned int bsize = bdev_logical_block_size(bdev); + loff_t size = i_size_read(bdev->bd_inode); + + while (bsize < PAGE_SIZE) { + if (size & bsize) + break; + bsize <<= 1; + } + bdev->bd_inode->i_blkbits = blksize_bits(bsize); } int set_blocksize(struct block_device *bdev, int size) From 6c635caef410aa757befbd8857c1eadde5cc22ed Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 28 Jan 2021 13:58:15 +0800 Subject: [PATCH 06/12] blk-cgroup: Use cond_resched() when destroy blkgs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On !PREEMPT kernel, we can get below softlockup when doing stress testing with creating and destroying block cgroup repeatly. The reason is it may take a long time to acquire the queue's lock in the loop of blkcg_destroy_blkgs(), or the system can accumulate a huge number of blkgs in pathological cases. We can add a need_resched() check on each loop and release locks and do cond_resched() if true to avoid this issue, since the blkcg_destroy_blkgs() is not called from atomic contexts. [ 4757.010308] watchdog: BUG: soft lockup - CPU#11 stuck for 94s! [ 4757.010698] Call trace: [ 4757.010700]  blkcg_destroy_blkgs+0x68/0x150 [ 4757.010701]  cgwb_release_workfn+0x104/0x158 [ 4757.010702]  process_one_work+0x1bc/0x3f0 [ 4757.010704]  worker_thread+0x164/0x468 [ 4757.010705]  kthread+0x108/0x138 Suggested-by: Tejun Heo Signed-off-by: Baolin Wang Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 031114d454a6..4221a1539391 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1016,6 +1016,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) */ void blkcg_destroy_blkgs(struct blkcg *blkcg) { + might_sleep(); + spin_lock_irq(&blkcg->lock); while (!hlist_empty(&blkcg->blkg_list)) { @@ -1023,14 +1025,20 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg) struct blkcg_gq, blkcg_node); struct request_queue *q = blkg->q; - if (spin_trylock(&q->queue_lock)) { - blkg_destroy(blkg); - spin_unlock(&q->queue_lock); - } else { + if (need_resched() || !spin_trylock(&q->queue_lock)) { + /* + * Given that the system can accumulate a huge number + * of blkgs in pathological cases, check to see if we + * need to rescheduling to avoid softlockup. + */ spin_unlock_irq(&blkcg->lock); - cpu_relax(); + cond_resched(); spin_lock_irq(&blkcg->lock); + continue; } + + blkg_destroy(blkg); + spin_unlock(&q->queue_lock); } spin_unlock_irq(&blkcg->lock); From 0fe37724f8e70fa4cb72948f60fca553702df768 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 28 Jan 2021 15:36:19 +0900 Subject: [PATCH 07/12] block: fix bd_size_lock use Some block device drivers, e.g. the skd driver, call set_capacity() with IRQ disabled. This results in lockdep ito complain about inconsistent lock states ("inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage") because set_capacity takes a block device bd_size_lock using the functions spin_lock() and spin_unlock(). Ensure a consistent locking state by replacing these calls with spin_lock_irqsave() and spin_lock_irqrestore(). The same applies to bdev_set_nr_sectors(). With this fix, all lockdep complaints are resolved. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/genhd.c | 5 +++-- block/partitions/core.c | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 419548e92d82..9e741a4f351b 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -45,10 +45,11 @@ static void disk_release_events(struct gendisk *disk); void set_capacity(struct gendisk *disk, sector_t sectors) { struct block_device *bdev = disk->part0; + unsigned long flags; - spin_lock(&bdev->bd_size_lock); + spin_lock_irqsave(&bdev->bd_size_lock, flags); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); + spin_unlock_irqrestore(&bdev->bd_size_lock, flags); } EXPORT_SYMBOL(set_capacity); diff --git a/block/partitions/core.c b/block/partitions/core.c index 23460cee9de5..4601a845cd79 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -88,9 +88,11 @@ static int (*check_part[])(struct parsed_partitions *) = { static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) { - spin_lock(&bdev->bd_size_lock); + unsigned long flags; + + spin_lock_irqsave(&bdev->bd_size_lock, flags); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); - spin_unlock(&bdev->bd_size_lock); + spin_unlock_irqrestore(&bdev->bd_size_lock, flags); } static struct parsed_partitions *allocate_partitions(struct gendisk *hd) From 0df28cad06eb41cc36bfea69d9c882fb567fd0d6 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 28 Jan 2021 18:48:47 +0800 Subject: [PATCH 08/12] bcache: only check feature sets when sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES For super block version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES, it doesn't make sense to check the feature sets. This patch checks super block version in bch_has_feature_* routines, if the version doesn't have feature sets yet, returns 0 (false) to the caller. Fixes: 5342fd425502 ("bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET") Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket") Cc: stable@vger.kernel.org # 5.9+ Reported-and-tested-by: Bockholdt Arne Signed-off-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/features.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h index 84fc2c0f0101..d1c8fd3977fc 100644 --- a/drivers/md/bcache/features.h +++ b/drivers/md/bcache/features.h @@ -33,6 +33,8 @@ #define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_compat & \ BCH##_FEATURE_COMPAT_##flagname) != 0); \ } \ @@ -50,6 +52,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_ro_compat & \ BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \ } \ @@ -67,6 +71,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \ #define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \ static inline int bch_has_feature_##name(struct cache_sb *sb) \ { \ + if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \ + return 0; \ return (((sb)->feature_incompat & \ BCH##_FEATURE_INCOMPAT_##flagname) != 0); \ } \ From 899199292b14b7c735808a37517de4dd2160c300 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 25 Jan 2021 21:19:16 -0800 Subject: [PATCH 09/12] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a SPCC device This adds a quirk for SPCC 256GB NVMe 1.3 drive which fixes timeouts and I/O errors due to the fact that the controller does not properly handle the Write Zeroes command: [ 2745.659527] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G E 5.10.6-BET #1 [ 2745.659528] Hardware name: System manufacturer System Product Name/PRIME X570-P, BIOS 3001 12/04/2020 [ 2776.138874] nvme nvme1: I/O 414 QID 3 timeout, aborting [ 2776.138886] nvme nvme1: I/O 415 QID 3 timeout, aborting [ 2776.138891] nvme nvme1: I/O 416 QID 3 timeout, aborting [ 2776.138895] nvme nvme1: I/O 417 QID 3 timeout, aborting [ 2776.138912] nvme nvme1: Abort status: 0x0 [ 2776.138921] nvme nvme1: I/O 428 QID 3 timeout, aborting [ 2776.138922] nvme nvme1: Abort status: 0x0 [ 2776.138925] nvme nvme1: Abort status: 0x0 [ 2776.138974] nvme nvme1: Abort status: 0x0 [ 2776.138977] nvme nvme1: Abort status: 0x0 [ 2806.346792] nvme nvme1: I/O 414 QID 3 timeout, reset controller [ 2806.363566] nvme nvme1: 15/0/0 default/read/poll queues [ 2836.554298] nvme nvme1: I/O 415 QID 3 timeout, disable controller [ 2836.672064] blk_update_request: I/O error, dev nvme1n1, sector 16350 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672072] blk_update_request: I/O error, dev nvme1n1, sector 16093 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672074] blk_update_request: I/O error, dev nvme1n1, sector 15836 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672076] blk_update_request: I/O error, dev nvme1n1, sector 15579 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672078] blk_update_request: I/O error, dev nvme1n1, sector 15322 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672080] blk_update_request: I/O error, dev nvme1n1, sector 15065 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672082] blk_update_request: I/O error, dev nvme1n1, sector 14808 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672083] blk_update_request: I/O error, dev nvme1n1, sector 14551 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672085] blk_update_request: I/O error, dev nvme1n1, sector 14294 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672087] blk_update_request: I/O error, dev nvme1n1, sector 14037 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0 [ 2836.672121] nvme nvme1: failed to mark controller live state [ 2836.672123] nvme nvme1: Removing after probe failure status: -19 [ 2836.689016] Aborting journal on device dm-0-8. [ 2836.689024] Buffer I/O error on dev dm-0, logical block 25198592, lost sync page write [ 2836.689027] JBD2: Error -5 detected when updating journal superblock for dm-0-8. Reported-by: Bradley Chapman Signed-off-by: Chaitanya Kulkarni Tested-by: Bradley Chapman Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 856aa31931c1..81e6389b2042 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3257,6 +3257,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ + .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), .driver_data = NVME_QUIRK_SINGLE_VECTOR }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, From d1bcf006a9d3d63c1bcb65a993cb13756954cd9c Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 27 Jan 2021 11:30:33 +0100 Subject: [PATCH 10/12] nvme-multipath: Early exit if no path is available nvme_round_robin_path() should test if the return ns pointer is valid. nvme_next_ns() will return a NULL pointer if there is no path left. Fixes: 75c10e732724 ("nvme-multipath: round-robin I/O policy") Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 9ac762b28811..282b7a4ea9a9 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -221,7 +221,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head, } for (ns = nvme_next_ns(head, old); - ns != old; + ns && ns != old; ns = nvme_next_ns(head, ns)) { if (nvme_path_is_disabled(ns)) continue; From 772ea326a4a00b6b4b2c8f3606ad10c31f46c511 Mon Sep 17 00:00:00 2001 From: Chao Leng Date: Thu, 28 Jan 2021 11:33:51 +0800 Subject: [PATCH 11/12] nvme-core: use list_add_tail_rcu instead of list_add_tail for nvme_init_ns_head The "list" of nvme_ns_head is used as rcu list, now in nvme_init_ns_head list_add_tail is used to add ns->siblings to the rcu list. It is not safe. Should use list_add_tail_rcu instead of list_add_tail. Signed-off-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8caf9b34734d..f13eb4ded95f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3829,7 +3829,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } } - list_add_tail(&ns->siblings, &head->list); + list_add_tail_rcu(&ns->siblings, &head->list); ns->head = head; mutex_unlock(&ctrl->subsys->lock); return 0; From cd92cdb9c8bcfc27a8f28bcbf7c414a0ea79e5ec Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 29 Jan 2021 23:47:25 +0900 Subject: [PATCH 12/12] null_blk: cleanup zoned mode initialization To avoid potential compilation problems, replaced the badly written MB_TO_SECTS() macro (missing parenthesis around the argument use) with the inline function mb_to_sects(). And while at it, simplify the calculation of the total number of zones of the device using the round_up() macro. Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- drivers/block/null_blk/zoned.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 148b871f263b..fce0a54df0e5 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -6,7 +6,10 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) +static inline sector_t mb_to_sects(unsigned long mb) +{ + return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT; +} static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { @@ -77,12 +80,11 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } - zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity); - dev_capacity_sects = MB_TO_SECTS(dev->size); - dev->zone_size_sects = MB_TO_SECTS(dev->zone_size); - dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects); - if (dev_capacity_sects & (dev->zone_size_sects - 1)) - dev->nr_zones++; + zone_capacity_sects = mb_to_sects(dev->zone_capacity); + dev_capacity_sects = mb_to_sects(dev->size); + dev->zone_size_sects = mb_to_sects(dev->zone_size); + dev->nr_zones = round_up(dev_capacity_sects, dev->zone_size_sects) + >> ilog2(dev->zone_size_sects); dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone), GFP_KERNEL | __GFP_ZERO);