From f34e25898a608380a60135288019c4cb6013bec8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Apr 2019 16:25:48 -0700 Subject: [PATCH 01/12] nvme-tcp: fix possible null deref on a timed out io queue connect If I/O queue connect times out, we might have freed the queue socket already, so check for that on the error path in nvme_tcp_start_queue. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 2405bb9c63cc..2b107a1d152b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1423,7 +1423,8 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) if (!ret) { set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); } else { - __nvme_tcp_stop_queue(&ctrl->queues[idx]); + if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) + __nvme_tcp_stop_queue(&ctrl->queues[idx]); dev_err(nctrl->device, "failed to connect queue: %d ret=%d\n", idx, ret); } From 525aa5a705d86e193726ee465d1a975265fabf19 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 30 Apr 2019 18:57:09 +0200 Subject: [PATCH 02/12] nvme-multipath: split bios with the ns_head bio_set before submitting If the bio is moved to a different queue via blk_steal_bios() and the original queue is destroyed in nvme_remove_ns() we'll be ending with a crash in bio_endio() as the mempool for the split bio bvecs had already been destroyed. So split the bio using the original queue (which will remain during the lifetime of the bio) before sending it down to the underlying device. Signed-off-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f0716f6ce41f..e6ddc83223df 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -232,6 +232,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; + /* + * The namespace might be going away and the bio might + * be moved to a different queue via blk_steal_bios(), + * so we need to use the bio_split pool from the original + * queue to allocate the bvecs from. + */ + blk_queue_split(q, &bio); + srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); if (likely(ns)) { From 592b6e7b0226b198c12439065f725be00c92d559 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Sun, 28 Apr 2019 20:24:42 -0700 Subject: [PATCH 03/12] nvme-multipath: don't print ANA group state by default Signed-off-by: Hannes Reinecke Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e6ddc83223df..5c9429d41120 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -429,7 +429,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, unsigned *nr_change_groups = data; struct nvme_ns *ns; - dev_info(ctrl->device, "ANA group %d: %s.\n", + dev_dbg(ctrl->device, "ANA group %d: %s.\n", le32_to_cpu(desc->grpid), nvme_ana_state_names[desc->state]); From 049bf37262c61c99f45438910711b55054b24838 Mon Sep 17 00:00:00 2001 From: Klaus Birkelund Jensen Date: Tue, 30 Apr 2019 18:53:29 +0200 Subject: [PATCH 04/12] nvme-pci: fix psdt field for single segment sgls The shortcut for single segment SGL requests did not set the PSDT field to mark the request as using SGLs. Fixes: 297910571f08 ("nvme-pci: optimize mapping single segment requests using SGLs") Signed-off-by: Klaus Birkelund Jensen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c1eecde6b853..efc1da56521c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -830,6 +830,7 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, return BLK_STS_RESOURCE; iod->dma_len = bv->bv_len; + cmnd->flags = NVME_CMD_SGL_METABUF; cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; From 9dc1a38ef1925d23c2933c5867df816386d92ff8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 Apr 2019 09:33:40 -0600 Subject: [PATCH 05/12] nvme-pci: shutdown on timeout during deletion We do not restart a controller in a deleting state for timeout errors. When in this state, unblock potential request dispatchers with failed completions by shutting down the controller on timeout detection. Reported-by: Yufen Yu Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index efc1da56521c..3df0f2b29427 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1277,6 +1277,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd; + bool shutdown = false; u32 csts = readl(dev->bar + NVME_REG_CSTS); /* If PCI error recovery process is happening, we cannot reset or @@ -1313,12 +1314,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { + case NVME_CTRL_DELETING: + shutdown = true; case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); - nvme_dev_disable(dev, false); + nvme_dev_disable(dev, shutdown); nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_DONE; default: From c8e9e9b7646ebe1c5066ddc420d7630876277eb4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 Apr 2019 09:33:41 -0600 Subject: [PATCH 06/12] nvme-pci: unquiesce admin queue on shutdown Just like IO queues, the admin queue also will not be restarted after a controller shutdown. Unquiesce this queue so that we do not block request dispatch on a permanently disabled controller. Reported-by: Yufen Yu Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3df0f2b29427..ac10d3ad1e75 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2437,8 +2437,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * must flush all entered requests to their failed completion to avoid * deadlocking blk-mq hot-cpu notifier. */ - if (shutdown) + if (shutdown) { nvme_start_queues(&dev->ctrl); + if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) + blk_mq_unquiesce_queue(dev->ctrl.admin_q); + } mutex_unlock(&dev->shutdown_lock); } From 665648673ef5384c7194ea6df4b55f2da98646cf Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Apr 2019 00:52:39 +0900 Subject: [PATCH 07/12] nvme-pci: remove an unneeded variable initialization Variable "n" will be assigned once kstrtoint() succeeds, otherwise it will not be referred because kstrtoint() will return an error which means go out from this function. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ac10d3ad1e75..e2ff92de41a7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -146,7 +146,7 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) static int queue_count_set(const char *val, const struct kernel_param *kp) { - int n = 0, ret; + int n, ret; ret = kstrtoint(val, 10, &n); if (ret) From a97234e1ff1ec9d5a41c6adff5632c61639dee6a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Apr 2019 00:18:32 +0900 Subject: [PATCH 08/12] nvme-pci: check more command sizes All the NVMe command has 64bytes fixed size so that it has been assured with BUILD_BUG_ON(). The remaining command structures in linux/nvme.h also need to be checked here. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e2ff92de41a7..9c1a8fd68b3a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -231,19 +231,26 @@ struct nvme_iod { */ static inline void _nvme_check_size(void) { + BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); + BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); } static unsigned int max_io_queues(void) From a2faf94e57c5237a9cad31f63eeaf2412ed0e951 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Apr 2019 00:18:31 +0900 Subject: [PATCH 09/12] nvme-fabrics: check more command sizes struct common_command provides a common structure for NVMe-oF command format. It also needs to be checked for unintended size growth. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index d4cb826f58ff..592d1e61ef7e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1188,6 +1188,7 @@ static void __exit nvmf_exit(void) class_destroy(nvmf_class); nvmf_host_put(nvmf_default_host); + BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64); From 811015409fd4af80bbecb8e46b3aa24c8986fb74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 11:36:52 -0400 Subject: [PATCH 10/12] nvme: move command size checks to the core Most command aren't PCIe specific, so move the size checking for them to core.c Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 27 +++++++++++++++++++++++++++ drivers/nvme/host/pci.c | 31 +++---------------------------- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3dd043aa6d1f..e970c5adee28 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3879,10 +3879,37 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); +/* + * Check we didn't inadvertently grow the command structure sizes: + */ +static inline void _nvme_check_size(void) +{ + BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); + BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); + BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); + BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); + BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); +} + + int __init nvme_core_init(void) { int result = -ENOMEM; + _nvme_check_size(); + nvme_wq = alloc_workqueue("nvme-wq", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!nvme_wq) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9c1a8fd68b3a..3e4fb891a95a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -226,33 +226,6 @@ struct nvme_iod { struct scatterlist *sg; }; -/* - * Check we didin't inadvertently grow the command struct - */ -static inline void _nvme_check_size(void) -{ - BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); - BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); - BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); - BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); - BUILD_BUG_ON(sizeof(struct nvme_features) != 64); - BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); - BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); - BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); - BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); - BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); -} - static unsigned int max_io_queues(void) { return num_possible_cpus() + write_queues + poll_queues; @@ -2988,6 +2961,9 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { + BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); + BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); + BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); return pci_register_driver(&nvme_driver); } @@ -2996,7 +2972,6 @@ static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); flush_workqueue(nvme_wq); - _nvme_check_size(); } MODULE_AUTHOR("Matthew Wilcox "); From 893a74b7a76e6e9c5c7199e6aae946f090622fa2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 11:37:43 -0400 Subject: [PATCH 11/12] nvme: mark nvme_core_init and nvme_core_exit static Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/nvme.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e970c5adee28..cd16d98d1f1a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3904,7 +3904,7 @@ static inline void _nvme_check_size(void) } -int __init nvme_core_init(void) +static int __init nvme_core_init(void) { int result = -ENOMEM; @@ -3956,7 +3956,7 @@ out: return result; } -void __exit nvme_core_exit(void) +static void __exit nvme_core_exit(void) { ida_destroy(&nvme_subsystems_ida); class_destroy(nvme_subsys_class); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 527d64545023..5ee75b5ff83f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -577,7 +577,4 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } -int __init nvme_core_init(void); -void __exit nvme_core_exit(void); - #endif /* _NVME_H */ From 6f53e73b9ec5b3cd097077c5ffcb76df708ce3f8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Apr 2019 16:28:19 -0700 Subject: [PATCH 12/12] nvmet: protect discovery change log event list iteration When we iterate on the discovery subsystem controllers we need to protect against concurrent mutations to it. Signed-off-by: Sagi Grimberg Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/target/discovery.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index e8e09266bfa5..5baf269f3f8a 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -30,14 +30,17 @@ void nvmet_port_disc_changed(struct nvmet_port *port, { struct nvmet_ctrl *ctrl; + lockdep_assert_held(&nvmet_config_sem); nvmet_genctr++; + mutex_lock(&nvmet_disc_subsys->lock); list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn)) continue; __nvmet_disc_changed(port, ctrl); } + mutex_unlock(&nvmet_disc_subsys->lock); } static void __nvmet_subsys_disc_changed(struct nvmet_port *port, @@ -46,12 +49,14 @@ static void __nvmet_subsys_disc_changed(struct nvmet_port *port, { struct nvmet_ctrl *ctrl; + mutex_lock(&nvmet_disc_subsys->lock); list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn)) continue; __nvmet_disc_changed(port, ctrl); } + mutex_unlock(&nvmet_disc_subsys->lock); } void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,