From 2569063c7140c65a0d0ad075e95ddfbcda9ba3c0 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sun, 27 Dec 2020 19:34:58 +0800
Subject: [PATCH 01/12] blk-mq: test QUEUE_FLAG_HCTX_ACTIVE for sbitmap_shared
 in hctx_may_queue

In case of blk_mq_is_sbitmap_shared(), we should test QUEUE_FLAG_HCTX_ACTIVE against
q->queue_flags instead of BLK_MQ_S_TAG_ACTIVE.

So fix it.

Cc: John Garry <john.garry@huawei.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Fixes: f1b49fdc1c64 ("blk-mq: Record active_queues_shared_sbitmap per tag_set for when using shared sbitmap")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index c1458d9502f1..3616453ca28c 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -304,7 +304,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 		struct request_queue *q = hctx->queue;
 		struct blk_mq_tag_set *set = q->tag_set;
 
-		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &q->queue_flags))
+		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return true;
 		users = atomic_read(&set->active_queues_shared_sbitmap);
 	} else {

From ef49d40b61a3e18a11edd5eb1c30b0183af9e850 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Sun, 17 Jan 2021 16:50:17 +0800
Subject: [PATCH 02/12] block: Fix an error handling in add_partition

Once we have called device_initialize(), we should use put_device() to
give up the reference on error, just like what we have done on failure
of device_add().

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partitions/core.c b/block/partitions/core.c
index e7d776db803b..23460cee9de5 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -384,7 +384,7 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
 
 	err = blk_alloc_devt(bdev, &devt);
 	if (err)
-		goto out_bdput;
+		goto out_put;
 	pdev->devt = devt;
 
 	/* delay uevent until 'holders' subdir is created */

From ac55ad2b5fadb6af8826963d7d3331c9950a2608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20H=C3=B6ppner?= <hoeppner@linux.ibm.com>
Date: Mon, 18 Jan 2021 17:55:18 +0100
Subject: [PATCH 03/12] s390/dasd: Fix inconsistent kobject removal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our intention was to only remove path kobjects whenever a device is
being set offline. However, one corner case was missing.

If a device is disabled and enabled (using the IOCTLs BIODASDDISABLE and
BIODASDENABLE respectively), the enabling process will call
dasd_eckd_reload_device() which itself calls dasd_eckd_read_conf() in
order to update path information. During that update,
dasd_eckd_clear_conf_data() clears all old data and also removes all
kobjects. This will leave us with an inconsistent state of path kobjects
and a subsequent path verification leads to a failing kobject creation.

Fix this by removing kobjects only in the context of offlining a device
as initially intended.

Fixes: 19508b204740 ("s390/dasd: Display FC Endpoint Security information via sysfs")
Reported-by: Stefan Haberland <sth@linux.ibm.com>
Signed-off-by: Jan Höppner <hoeppner@linux.ibm.com>
Reviewed-by: Stefan Haberland <sth@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/s390/block/dasd_devmap.c | 20 ++++++++++++++------
 drivers/s390/block/dasd_eckd.c   |  3 ++-
 drivers/s390/block/dasd_int.h    |  2 +-
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 16bb135c20aa..03d27ee9cac6 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -1874,18 +1874,26 @@ void dasd_path_create_kobjects(struct dasd_device *device)
 }
 EXPORT_SYMBOL(dasd_path_create_kobjects);
 
-/*
- * As we keep kobjects for the lifetime of a device, this function must not be
- * called anywhere but in the context of offlining a device.
- */
-void dasd_path_remove_kobj(struct dasd_device *device, int chp)
+static void dasd_path_remove_kobj(struct dasd_device *device, int chp)
 {
 	if (device->path[chp].in_sysfs) {
 		kobject_put(&device->path[chp].kobj);
 		device->path[chp].in_sysfs = false;
 	}
 }
-EXPORT_SYMBOL(dasd_path_remove_kobj);
+
+/*
+ * As we keep kobjects for the lifetime of a device, this function must not be
+ * called anywhere but in the context of offlining a device.
+ */
+void dasd_path_remove_kobjects(struct dasd_device *device)
+{
+	int i;
+
+	for (i = 0; i < 8; i++)
+		dasd_path_remove_kobj(device, i);
+}
+EXPORT_SYMBOL(dasd_path_remove_kobjects);
 
 int dasd_add_sysfs_files(struct ccw_device *cdev)
 {
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 3caa1ee5f4b0..65eb87cbbb9b 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1036,7 +1036,6 @@ static void dasd_eckd_clear_conf_data(struct dasd_device *device)
 		device->path[i].ssid = 0;
 		device->path[i].chpid = 0;
 		dasd_path_notoper(device, i);
-		dasd_path_remove_kobj(device, i);
 	}
 }
 
@@ -2173,6 +2172,7 @@ out_err2:
 	device->block = NULL;
 out_err1:
 	dasd_eckd_clear_conf_data(device);
+	dasd_path_remove_kobjects(device);
 	kfree(device->private);
 	device->private = NULL;
 	return rc;
@@ -2191,6 +2191,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device)
 	private->vdsneq = NULL;
 	private->gneq = NULL;
 	dasd_eckd_clear_conf_data(device);
+	dasd_path_remove_kobjects(device);
 }
 
 static struct dasd_ccw_req *
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 3bc008f9136c..b8a04c42d1d2 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -858,7 +858,7 @@ int dasd_add_sysfs_files(struct ccw_device *);
 void dasd_remove_sysfs_files(struct ccw_device *);
 void dasd_path_create_kobj(struct dasd_device *, int);
 void dasd_path_create_kobjects(struct dasd_device *);
-void dasd_path_remove_kobj(struct dasd_device *, int);
+void dasd_path_remove_kobjects(struct dasd_device *);
 
 struct dasd_device *dasd_device_from_cdev(struct ccw_device *);
 struct dasd_device *dasd_device_from_cdev_locked(struct ccw_device *);

From b98e762e3d71e893b221f871825dc64694cfb258 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Mon, 25 Jan 2021 12:21:02 -0500
Subject: [PATCH 04/12] nbd: freeze the queue while we're adding connections

When setting up a device, we can krealloc the config->socks array to add
new sockets to the configuration.  However if we happen to get a IO
request in at this point even though we aren't setup we could hit a UAF,
as we deref config->socks without any locking, assuming that the
configuration was setup already and that ->socks is safe to access it as
we have a reference on the configuration.

But there's nothing really preventing IO from occurring at this point of
the device setup, we don't want to incur the overhead of a lock to
access ->socks when it will never change while the device is running.
To fix this UAF scenario simply freeze the queue if we are adding
sockets.  This will protect us from this particular case without adding
any additional overhead for the normal running case.

Cc: stable@vger.kernel.org
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 6727358e147d..e6ea5d344f87 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1022,6 +1022,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	if (!sock)
 		return err;
 
+	/*
+	 * We need to make sure we don't get any errant requests while we're
+	 * reallocating the ->socks array.
+	 */
+	blk_mq_freeze_queue(nbd->disk->queue);
+
 	if (!netlink && !nbd->task_setup &&
 	    !test_bit(NBD_RT_BOUND, &config->runtime_flags))
 		nbd->task_setup = current;
@@ -1060,10 +1066,12 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
 	nsock->cookie = 0;
 	socks[config->num_connections++] = nsock;
 	atomic_inc(&config->live_connections);
+	blk_mq_unfreeze_queue(nbd->disk->queue);
 
 	return 0;
 
 put_socket:
+	blk_mq_unfreeze_queue(nbd->disk->queue);
 	sockfd_put(sock);
 	return err;
 }

From 8dc932d3e8afb65e12eba7495f046c83884c49bf Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maxtram95@gmail.com>
Date: Tue, 26 Jan 2021 21:59:07 +0200
Subject: [PATCH 05/12] Revert "block: simplify set_init_blocksize" to regain
 lost performance

The cited commit introduced a serious regression with SATA write speed,
as found by bisecting. This patch reverts this commit, which restores
write speed back to the values observed before this commit.

The performance tests were done on a Helios4 NAS (2nd batch) with 4 HDDs
(WD8003FFBX) using dd (bs=1M count=2000). "Direct" is a test with a
single HDD, the rest are different RAID levels built over the first
partitions of 4 HDDs. Test results are in MB/s, R is read, W is write.

                | Direct | RAID0 | RAID10 f2 | RAID10 n2 | RAID6
----------------+--------+-------+-----------+-----------+--------
9011495c9466    | R:256  | R:313 | R:276     | R:313     | R:323
(before faulty) | W:254  | W:253 | W:195     | W:204     | W:117
----------------+--------+-------+-----------+-----------+--------
5ff9f19231a0    | R:257  | R:398 | R:312     | R:344     | R:391
(faulty commit) | W:154  | W:122 | W:67.7    | W:66.6    | W:67.2
----------------+--------+-------+-----------+-----------+--------
5.10.10         | R:256  | R:401 | R:312     | R:356     | R:375
unpatched       | W:149  | W:123 | W:64      | W:64.1    | W:61.5
----------------+--------+-------+-----------+-----------+--------
5.10.10         | R:255  | R:396 | R:312     | R:340     | R:393
patched         | W:247  | W:274 | W:220     | W:225     | W:121

Applying this patch doesn't hurt read performance, while improves the
write speed by 1.5x - 3.5x (more impact on RAID tests). The write speed
is restored back to the state before the faulty commit, and even a bit
higher in RAID tests (which aren't HDD-bound on this device) - that is
likely related to other optimizations done between the faulty commit and
5.10.10 which also improved the read speed.

Signed-off-by: Maxim Mikityanskiy <maxtram95@gmail.com>
Fixes: 5ff9f19231a0 ("block: simplify set_init_blocksize")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3b8963e228a1..235b5042672e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -130,7 +130,15 @@ EXPORT_SYMBOL(truncate_bdev_range);
 
 static void set_init_blocksize(struct block_device *bdev)
 {
-	bdev->bd_inode->i_blkbits = blksize_bits(bdev_logical_block_size(bdev));
+	unsigned int bsize = bdev_logical_block_size(bdev);
+	loff_t size = i_size_read(bdev->bd_inode);
+
+	while (bsize < PAGE_SIZE) {
+		if (size & bsize)
+			break;
+		bsize <<= 1;
+	}
+	bdev->bd_inode->i_blkbits = blksize_bits(bsize);
 }
 
 int set_blocksize(struct block_device *bdev, int size)

From 6c635caef410aa757befbd8857c1eadde5cc22ed Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Thu, 28 Jan 2021 13:58:15 +0800
Subject: [PATCH 06/12] blk-cgroup: Use cond_resched() when destroy blkgs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On !PREEMPT kernel, we can get below softlockup when doing stress
testing with creating and destroying block cgroup repeatly. The
reason is it may take a long time to acquire the queue's lock in
the loop of blkcg_destroy_blkgs(), or the system can accumulate a
huge number of blkgs in pathological cases. We can add a need_resched()
check on each loop and release locks and do cond_resched() if true
to avoid this issue, since the blkcg_destroy_blkgs() is not called
from atomic contexts.

[ 4757.010308] watchdog: BUG: soft lockup - CPU#11 stuck for 94s!
[ 4757.010698] Call trace:
[ 4757.010700]  blkcg_destroy_blkgs+0x68/0x150
[ 4757.010701]  cgwb_release_workfn+0x104/0x158
[ 4757.010702]  process_one_work+0x1bc/0x3f0
[ 4757.010704]  worker_thread+0x164/0x468
[ 4757.010705]  kthread+0x108/0x138

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 031114d454a6..4221a1539391 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1016,6 +1016,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
  */
 void blkcg_destroy_blkgs(struct blkcg *blkcg)
 {
+	might_sleep();
+
 	spin_lock_irq(&blkcg->lock);
 
 	while (!hlist_empty(&blkcg->blkg_list)) {
@@ -1023,14 +1025,20 @@ void blkcg_destroy_blkgs(struct blkcg *blkcg)
 						struct blkcg_gq, blkcg_node);
 		struct request_queue *q = blkg->q;
 
-		if (spin_trylock(&q->queue_lock)) {
-			blkg_destroy(blkg);
-			spin_unlock(&q->queue_lock);
-		} else {
+		if (need_resched() || !spin_trylock(&q->queue_lock)) {
+			/*
+			 * Given that the system can accumulate a huge number
+			 * of blkgs in pathological cases, check to see if we
+			 * need to rescheduling to avoid softlockup.
+			 */
 			spin_unlock_irq(&blkcg->lock);
-			cpu_relax();
+			cond_resched();
 			spin_lock_irq(&blkcg->lock);
+			continue;
 		}
+
+		blkg_destroy(blkg);
+		spin_unlock(&q->queue_lock);
 	}
 
 	spin_unlock_irq(&blkcg->lock);

From 0fe37724f8e70fa4cb72948f60fca553702df768 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 28 Jan 2021 15:36:19 +0900
Subject: [PATCH 07/12] block: fix bd_size_lock use

Some block device drivers, e.g. the skd driver, call set_capacity() with
IRQ disabled. This results in lockdep ito complain about inconsistent
lock states ("inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage")
because set_capacity takes a block device bd_size_lock using the
functions spin_lock() and spin_unlock(). Ensure a consistent locking
state by replacing these calls with spin_lock_irqsave() and
spin_lock_irqrestore(). The same applies to bdev_set_nr_sectors().
With this fix, all lockdep complaints are resolved.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c           | 5 +++--
 block/partitions/core.c | 6 ++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 419548e92d82..9e741a4f351b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -45,10 +45,11 @@ static void disk_release_events(struct gendisk *disk);
 void set_capacity(struct gendisk *disk, sector_t sectors)
 {
 	struct block_device *bdev = disk->part0;
+	unsigned long flags;
 
-	spin_lock(&bdev->bd_size_lock);
+	spin_lock_irqsave(&bdev->bd_size_lock, flags);
 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-	spin_unlock(&bdev->bd_size_lock);
+	spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
 }
 EXPORT_SYMBOL(set_capacity);
 
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 23460cee9de5..4601a845cd79 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -88,9 +88,11 @@ static int (*check_part[])(struct parsed_partitions *) = {
 
 static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 {
-	spin_lock(&bdev->bd_size_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bdev->bd_size_lock, flags);
 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-	spin_unlock(&bdev->bd_size_lock);
+	spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
 }
 
 static struct parsed_partitions *allocate_partitions(struct gendisk *hd)

From 0df28cad06eb41cc36bfea69d9c882fb567fd0d6 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Thu, 28 Jan 2021 18:48:47 +0800
Subject: [PATCH 08/12] bcache: only check feature sets when sb->version >=
 BCACHE_SB_VERSION_CDEV_WITH_FEATURES

For super block version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES, it
doesn't make sense to check the feature sets. This patch checks
super block version in bch_has_feature_* routines, if the version
doesn't have feature sets yet, returns 0 (false) to the caller.

Fixes: 5342fd425502 ("bcache: set bcache device into read-only mode for BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET")
Fixes: ffa470327572 ("bcache: add bucket_size_hi into struct cache_sb_disk for large bucket")
Cc: stable@vger.kernel.org # 5.9+
Reported-and-tested-by: Bockholdt Arne <a.bockholdt@precitec-optronik.de>
Signed-off-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/features.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index 84fc2c0f0101..d1c8fd3977fc 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -33,6 +33,8 @@
 #define BCH_FEATURE_COMPAT_FUNCS(name, flagname) \
 static inline int bch_has_feature_##name(struct cache_sb *sb) \
 { \
+	if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \
+		return 0; \
 	return (((sb)->feature_compat & \
 		BCH##_FEATURE_COMPAT_##flagname) != 0); \
 } \
@@ -50,6 +52,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
 #define BCH_FEATURE_RO_COMPAT_FUNCS(name, flagname) \
 static inline int bch_has_feature_##name(struct cache_sb *sb) \
 { \
+	if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \
+		return 0; \
 	return (((sb)->feature_ro_compat & \
 		BCH##_FEATURE_RO_COMPAT_##flagname) != 0); \
 } \
@@ -67,6 +71,8 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
 #define BCH_FEATURE_INCOMPAT_FUNCS(name, flagname) \
 static inline int bch_has_feature_##name(struct cache_sb *sb) \
 { \
+	if (sb->version < BCACHE_SB_VERSION_CDEV_WITH_FEATURES) \
+		return 0; \
 	return (((sb)->feature_incompat & \
 		BCH##_FEATURE_INCOMPAT_##flagname) != 0); \
 } \

From 899199292b14b7c735808a37517de4dd2160c300 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 25 Jan 2021 21:19:16 -0800
Subject: [PATCH 09/12] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a SPCC
 device

This adds a quirk for SPCC 256GB NVMe 1.3 drive which fixes timeouts and
I/O errors due to the fact that the controller does not properly
handle the Write Zeroes command:

[ 2745.659527] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G            E 5.10.6-BET #1
[ 2745.659528] Hardware name: System manufacturer System Product Name/PRIME X570-P, BIOS 3001 12/04/2020
[ 2776.138874] nvme nvme1: I/O 414 QID 3 timeout, aborting
[ 2776.138886] nvme nvme1: I/O 415 QID 3 timeout, aborting
[ 2776.138891] nvme nvme1: I/O 416 QID 3 timeout, aborting
[ 2776.138895] nvme nvme1: I/O 417 QID 3 timeout, aborting
[ 2776.138912] nvme nvme1: Abort status: 0x0
[ 2776.138921] nvme nvme1: I/O 428 QID 3 timeout, aborting
[ 2776.138922] nvme nvme1: Abort status: 0x0
[ 2776.138925] nvme nvme1: Abort status: 0x0
[ 2776.138974] nvme nvme1: Abort status: 0x0
[ 2776.138977] nvme nvme1: Abort status: 0x0
[ 2806.346792] nvme nvme1: I/O 414 QID 3 timeout, reset controller
[ 2806.363566] nvme nvme1: 15/0/0 default/read/poll queues
[ 2836.554298] nvme nvme1: I/O 415 QID 3 timeout, disable controller
[ 2836.672064] blk_update_request: I/O error, dev nvme1n1, sector 16350 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672072] blk_update_request: I/O error, dev nvme1n1, sector 16093 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672074] blk_update_request: I/O error, dev nvme1n1, sector 15836 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672076] blk_update_request: I/O error, dev nvme1n1, sector 15579 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672078] blk_update_request: I/O error, dev nvme1n1, sector 15322 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672080] blk_update_request: I/O error, dev nvme1n1, sector 15065 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672082] blk_update_request: I/O error, dev nvme1n1, sector 14808 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672083] blk_update_request: I/O error, dev nvme1n1, sector 14551 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672085] blk_update_request: I/O error, dev nvme1n1, sector 14294 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672087] blk_update_request: I/O error, dev nvme1n1, sector 14037 op 0x9:(WRITE_ZEROES) flags 0x0 phys_seg 0 prio class 0
[ 2836.672121] nvme nvme1: failed to mark controller live state
[ 2836.672123] nvme nvme1: Removing after probe failure status: -19
[ 2836.689016] Aborting journal on device dm-0-8.
[ 2836.689024] Buffer I/O error on dev dm-0, logical block 25198592, lost sync page write
[ 2836.689027] JBD2: Error -5 detected when updating journal superblock for dm-0-8.

Reported-by: Bradley Chapman <chapman6235@comcast.net>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Tested-by: Bradley Chapman <chapman6235@comcast.net>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 856aa31931c1..81e6389b2042 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3257,6 +3257,8 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+	{ PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
+		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
 		.driver_data = NVME_QUIRK_SINGLE_VECTOR },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },

From d1bcf006a9d3d63c1bcb65a993cb13756954cd9c Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Wed, 27 Jan 2021 11:30:33 +0100
Subject: [PATCH 10/12] nvme-multipath: Early exit if no path is available

nvme_round_robin_path() should test if the return ns pointer is valid.
nvme_next_ns() will return a NULL pointer if there is no path left.

Fixes: 75c10e732724 ("nvme-multipath: round-robin I/O policy")
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 9ac762b28811..282b7a4ea9a9 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -221,7 +221,7 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
 	}
 
 	for (ns = nvme_next_ns(head, old);
-	     ns != old;
+	     ns && ns != old;
 	     ns = nvme_next_ns(head, ns)) {
 		if (nvme_path_is_disabled(ns))
 			continue;

From 772ea326a4a00b6b4b2c8f3606ad10c31f46c511 Mon Sep 17 00:00:00 2001
From: Chao Leng <lengchao@huawei.com>
Date: Thu, 28 Jan 2021 11:33:51 +0800
Subject: [PATCH 11/12] nvme-core: use list_add_tail_rcu instead of
 list_add_tail for nvme_init_ns_head

The "list" of nvme_ns_head is used as rcu list, now in nvme_init_ns_head
list_add_tail is used to add ns->siblings to the rcu list. It is not safe.
Should use list_add_tail_rcu instead of list_add_tail.

Signed-off-by: Chao Leng <lengchao@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8caf9b34734d..f13eb4ded95f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3829,7 +3829,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 		}
 	}
 
-	list_add_tail(&ns->siblings, &head->list);
+	list_add_tail_rcu(&ns->siblings, &head->list);
 	ns->head = head;
 	mutex_unlock(&ctrl->subsys->lock);
 	return 0;

From cd92cdb9c8bcfc27a8f28bcbf7c414a0ea79e5ec Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 29 Jan 2021 23:47:25 +0900
Subject: [PATCH 12/12] null_blk: cleanup zoned mode initialization

To avoid potential compilation problems, replaced the badly written
MB_TO_SECTS() macro (missing parenthesis around the argument use) with
the inline function mb_to_sects(). And while at it, simplify the
calculation of the total number of zones of the device using the
round_up() macro.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/zoned.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 148b871f263b..fce0a54df0e5 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -6,7 +6,10 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT)
+static inline sector_t mb_to_sects(unsigned long mb)
+{
+	return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
+}
 
 static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
 {
@@ -77,12 +80,11 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
 		return -EINVAL;
 	}
 
-	zone_capacity_sects = MB_TO_SECTS(dev->zone_capacity);
-	dev_capacity_sects = MB_TO_SECTS(dev->size);
-	dev->zone_size_sects = MB_TO_SECTS(dev->zone_size);
-	dev->nr_zones = dev_capacity_sects >> ilog2(dev->zone_size_sects);
-	if (dev_capacity_sects & (dev->zone_size_sects - 1))
-		dev->nr_zones++;
+	zone_capacity_sects = mb_to_sects(dev->zone_capacity);
+	dev_capacity_sects = mb_to_sects(dev->size);
+	dev->zone_size_sects = mb_to_sects(dev->zone_size);
+	dev->nr_zones = round_up(dev_capacity_sects, dev->zone_size_sects)
+		>> ilog2(dev->zone_size_sects);
 
 	dev->zones = kvmalloc_array(dev->nr_zones, sizeof(struct nullb_zone),
 				    GFP_KERNEL | __GFP_ZERO);