From 1c0d12c0b1a1a09fdfbc8e00c456581d04829915 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 18:04:12 -0700 Subject: [PATCH 01/18] nvme: fail cancelled commands with NVME_SC_HOST_PATH_ERROR NVME_SC_ABORT_REQ means that the request was aborted due to an abort command received. In our case, this is a transport cancellation, so host pathing error is much more appropriate. Also, convert NVME_SC_HOST_PATH_ERROR to BLK_STS_TRANSPORT for such that callers can understand that the status is a transport related error. This will be used by the ns scanning code to understand if it got an error from the controller or that the controller happens to be unreachable by the transport. Reviewed-by: Minwoo Im Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4660505eded9..066aeecca5d2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -226,6 +226,8 @@ static blk_status_t nvme_error_status(struct request *req) return BLK_STS_PROTECTION; case NVME_SC_RESERVATION_CONFLICT: return BLK_STS_NEXUS; + case NVME_SC_HOST_PATH_ERROR: + return BLK_STS_TRANSPORT; default: return BLK_STS_IOERR; } @@ -294,7 +296,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) if (blk_mq_request_completed(req)) return true; - nvme_req(req)->status = NVME_SC_ABORT_REQ; + nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; blk_mq_complete_request(req); return true; } From 16686010085f46783c895f8736e4f0ae74ae88a0 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 18:17:52 -0700 Subject: [PATCH 02/18] nvme-tcp: fail command with NVME_SC_HOST_PATH_ERROR send failed This is a more appropriate error status for a transport error detected by us (the host). Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 2d8ba31cb691..0a0263a364f2 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -842,7 +842,7 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static void nvme_tcp_fail_request(struct nvme_tcp_request *req) { - nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_DATA_XFER_ERROR); + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR); } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) From 74bd8cbe7dd64c50623cf0dc20eaeaab6b68d3d6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 6 Aug 2019 00:14:06 -0700 Subject: [PATCH 03/18] nvme-fc: Fail transport errors with NVME_SC_HOST_PATH NVME_SC_INTERNAL should indicate an internal controller errors and not host transport errors. These errors will propagate to upper layers (essentially nvme core) and be interpereted as transport errors which should not be taken into account for namespace state or condition. Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fc.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index bafe35bdffac..265f89e11d8b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1608,9 +1608,13 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (opstate == FCPOP_STATE_ABORTED) - status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); - else if (freq->status) - status = cpu_to_le16(NVME_SC_INTERNAL << 1); + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + else if (freq->status) { + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to lldd error %d\n", + ctrl->cnum, freq->status); + } /* * For the linux implementation, if we have an unsuccesful @@ -1637,8 +1641,13 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) * no payload in the CQE by the transport. */ if (freq->transferred_length != - be32_to_cpu(op->cmd_iu.data_len)) { - status = cpu_to_le16(NVME_SC_INTERNAL << 1); + be32_to_cpu(op->cmd_iu.data_len)) { + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to bad transfer " + "length: %d vs expected %d\n", + ctrl->cnum, freq->transferred_length, + be32_to_cpu(op->cmd_iu.data_len)); goto done; } result.u64 = 0; @@ -1655,7 +1664,17 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) freq->transferred_length || op->rsp_iu.status_code || sqe->common.command_id != cqe->command_id)) { - status = cpu_to_le16(NVME_SC_INTERNAL << 1); + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to bad NVMe_ERSP: " + "iu len %d, xfr len %d vs %d, status code " + "%d, cmdid %d vs %d\n", + ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len), + be32_to_cpu(op->rsp_iu.xfrd_len), + freq->transferred_length, + op->rsp_iu.status_code, + sqe->common.command_id, + cqe->command_id); goto done; } result = cqe->result; @@ -1663,7 +1682,11 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) break; default: - status = cpu_to_le16(NVME_SC_INTERNAL << 1); + status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: io failed due to odd NVMe_xRSP iu " + "len %d\n", + ctrl->cnum, freq->rcv_rsplen); goto done; } From 2f9c173647753b81eed7c198abf7622ab22dc49d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 29 Aug 2019 12:53:15 -0700 Subject: [PATCH 04/18] nvme: pass status to nvme_error_status No need for the full blown request structure. Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 066aeecca5d2..2797d38d2dca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -197,9 +197,9 @@ static inline bool nvme_ns_has_pi(struct nvme_ns *ns) return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); } -static blk_status_t nvme_error_status(struct request *req) +static blk_status_t nvme_error_status(u16 status) { - switch (nvme_req(req)->status & 0x7ff) { + switch (status & 0x7ff) { case NVME_SC_SUCCESS: return BLK_STS_OK; case NVME_SC_CAP_EXCEEDED: @@ -262,7 +262,7 @@ static void nvme_retry_req(struct request *req) void nvme_complete_rq(struct request *req) { - blk_status_t status = nvme_error_status(req); + blk_status_t status = nvme_error_status(nvme_req(req)->status); trace_nvme_complete_rq(req); From 331813f687ed41347b2b7dc784d81ccdbf6f9157 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 18:11:42 -0700 Subject: [PATCH 05/18] nvme: make nvme_identify_ns propagate errors back right now callers of nvme_identify_ns only know that it failed, but don't know why. Make nvme_identify_ns propagate the error back. Because nvme_submit_sync_cmd may return a positive status code, we make nvme_identify_ns receive the id by reference and return that status up the call chain, but make sure not to leak positive nvme status codes to the upper layers. Reviewed-by: Minwoo Im Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2797d38d2dca..52b453e4ae9c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1096,10 +1096,9 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n NVME_IDENTIFY_DATA_SIZE); } -static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, - unsigned nsid) +static int nvme_identify_ns(struct nvme_ctrl *ctrl, + unsigned nsid, struct nvme_id_ns **id) { - struct nvme_id_ns *id; struct nvme_command c = { }; int error; @@ -1108,18 +1107,17 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, c.identify.nsid = cpu_to_le32(nsid); c.identify.cns = NVME_ID_CNS_NS; - id = kmalloc(sizeof(*id), GFP_KERNEL); - if (!id) - return NULL; + *id = kmalloc(sizeof(**id), GFP_KERNEL); + if (!*id) + return -ENOMEM; - error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + error = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id)); if (error) { dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); - kfree(id); - return NULL; + kfree(*id); } - return id; + return error; } static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid, @@ -1740,13 +1738,13 @@ static int nvme_revalidate_disk(struct gendisk *disk) return -ENODEV; } - id = nvme_identify_ns(ctrl, ns->head->ns_id); - if (!id) - return -ENODEV; + ret = nvme_identify_ns(ctrl, ns->head->ns_id, &id); + if (ret) + goto out; if (id->ncap == 0) { ret = -ENODEV; - goto out; + goto free_id; } __nvme_revalidate_disk(disk, id); @@ -1757,8 +1755,11 @@ static int nvme_revalidate_disk(struct gendisk *disk) ret = -ENODEV; } -out: +free_id: kfree(id); +out: + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } @@ -3329,11 +3330,9 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); nvme_set_queue_limits(ctrl, ns->queue); - id = nvme_identify_ns(ctrl, nsid); - if (!id) { - ret = -EIO; + ret = nvme_identify_ns(ctrl, nsid, &id); + if (ret) goto out_free_queue; - } if (id->ncap == 0) { ret = -EINVAL; @@ -3395,6 +3394,8 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) blk_cleanup_queue(ns->queue); out_free_ns: kfree(ns); + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } From 538af88ea7d9de241e6b6f006e9049c4d96723bb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Aug 2019 18:16:12 -0700 Subject: [PATCH 06/18] nvme: make nvme_report_ns_ids propagate error back Make the callers check the return status and propagate back accordingly (casting to errno from a positive nvme status). Also print the return status in nvme_report_ns_ids. Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 52b453e4ae9c..f15a77dd3115 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1595,9 +1595,11 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); } -static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, +static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, struct nvme_id_ns *id, struct nvme_ns_ids *ids) { + int ret = 0; + memset(ids, 0, sizeof(*ids)); if (ctrl->vs >= NVME_VS(1, 1, 0)) @@ -1608,10 +1610,12 @@ static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, /* Don't treat error as fatal we potentially * already have a NGUID or EUI-64 */ - if (nvme_identify_ns_descs(ctrl, nsid, ids)) + ret = nvme_identify_ns_descs(ctrl, nsid, ids); + if (ret) dev_warn(ctrl->device, - "%s: Identify Descriptors failed\n", __func__); + "Identify Descriptors failed (%d)\n", ret); } + return ret; } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) @@ -1748,7 +1752,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) } __nvme_revalidate_disk(disk, id); - nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); + ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); + if (ret) + goto free_id; + if (!nvme_ns_ids_equal(&ns->head->ids, &ids)) { dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); @@ -3176,7 +3183,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, head->ns_id = nsid; kref_init(&head->ref); - nvme_report_ns_ids(ctrl, nsid, id, &head->ids); + ret = nvme_report_ns_ids(ctrl, nsid, id, &head->ids); + if (ret) + goto out_cleanup_srcu; ret = __nvme_check_ids(ctrl->subsys, head); if (ret) { @@ -3201,6 +3210,8 @@ out_ida_remove: out_free_head: kfree(head); out: + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ERR_PTR(ret); } @@ -3224,7 +3235,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } else { struct nvme_ns_ids ids; - nvme_report_ns_ids(ctrl, nsid, id, &ids); + ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); + if (ret) + goto out_unlock; + if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", @@ -3239,6 +3253,8 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, out_unlock: mutex_unlock(&ctrl->subsys->lock); + if (ret > 0) + ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } From 205da24343013e0bd62475800df79cd053f22326 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 30 Aug 2019 11:00:59 -0700 Subject: [PATCH 07/18] nvme: fix ns removal hang when failing to revalidate due to a transient error If a controller reset is racing with a namespace revalidation, the revalidation (admin) I/O will surely fail, but we should not remove the namespace as we will execute the I/O when the controller is back up. Same for spurious allocation errors (return -ENOMEM). Fix this by checking the specific error code in nvme_revalidate_disk and if it is a transient error (for example non DNR nvme statuses or a negative ENOMEM as allocation failure), do not remove the namespace as it will either recover when the controller is back up and schedule a subsequent scan, or the controller is going away and the namespaces will be removed anyways. This fixes a hang namespace scanning racing with a controller reset and also sporious I/O errors in path failover coditions where the controller reset is racing with the namespace scan work with multipath enabled. Reported-by: Hannes Reinecke Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f15a77dd3115..fad04282148d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1765,7 +1765,13 @@ static int nvme_revalidate_disk(struct gendisk *disk) free_id: kfree(id); out: - if (ret > 0) + /* + * Only fail the function if we got a fatal error back from the + * device, otherwise ignore the error and just move on. + */ + if (ret == -ENOMEM || (ret > 0 && !(ret & NVME_SC_DNR))) + ret = 0; + else if (ret > 0) ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } From c26aa572027d438de9cc311aaebcbe972f698c24 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 3 Sep 2019 14:20:37 -0700 Subject: [PATCH 08/18] nvme: Treat discovery subsystems as unique subsystems Current code matches subnqn and collapses all controllers to the same subnqn to a single subsystem structure. This is good for recognizing multiple controllers for the same subsystem. But with the well-known discovery subnqn, the subsystems aren't truly the same subsystem. As such, subsystem specific rules, such as no overlap of controller id, do not apply. With today's behavior, the check for overlap of controller id can fail, preventing the new discovery controller from being created. When searching for like subsystem nqn, exclude the discovery nqn from matching. This will result in each discovery controller being attached to a unique subsystem structure. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fad04282148d..0545eb97d838 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2374,6 +2374,17 @@ static struct nvme_subsystem *__nvme_find_get_subsystem(const char *subsysnqn) lockdep_assert_held(&nvme_subsystems_lock); + /* + * Fail matches for discovery subsystems. This results + * in each discovery controller bound to a unique subsystem. + * This avoids issues with validating controller values + * that can only be true when there is a single unique subsystem. + * There may be multiple and completely independent entities + * that provide discovery controllers. + */ + if (!strcmp(subsysnqn, NVME_DISC_SUBSYS_NAME)) + return NULL; + list_for_each_entry(subsys, &nvme_subsystems, entry) { if (strcmp(subsys->subnqn, subsysnqn)) continue; From 03894b7a896dc6eb3870e197bd7414ab0c947cbf Mon Sep 17 00:00:00 2001 From: Edmund Nadolski Date: Tue, 3 Sep 2019 14:08:47 -0600 Subject: [PATCH 09/18] nvme: include admin_q sync with nvme_sync_queues nvme_sync_queues currently syncs all namespace queues, but should also sync the admin queue, if present. Signed-off-by: Edmund Nadolski Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0545eb97d838..1777c8e6dffd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4010,6 +4010,9 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); + + if (ctrl->admin_q) + blk_sync_queue(ctrl->admin_q); } EXPORT_SYMBOL_GPL(nvme_sync_queues); From 312910f4d2fed987d1f4a6cd75e86c926e9ad557 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 5 Sep 2019 15:34:35 +0100 Subject: [PATCH 10/18] nvme: tcp: remove redundant assignment to variable ret The variable ret is being initialized with a value that is never read and is being re-assigned immediately afterwards. The assignment is redundant and hence can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 0a0263a364f2..4ffd5957637a 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1824,7 +1824,7 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) { struct nvmf_ctrl_options *opts = ctrl->opts; - int ret = -EINVAL; + int ret; ret = nvme_tcp_configure_admin_queue(ctrl, new); if (ret) From 733e4b69d508d03c20adfdcf4bd27abc60fae9cc Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 5 Sep 2019 10:33:54 -0600 Subject: [PATCH 11/18] nvme: Assign subsys instance from first ctrl The namespace disk names must be unique for the lifetime of the subsystem. This was accomplished by using their parent subsystems' instances which were allocated independently from the controllers connected to that subsystem. This allowed name prefixes assigned to namespaces to match a controller from an unrelated subsystem, and has created confusion among users examining device nodes. Ensure a namespace's subsystem instance never clashes with a controller instance of another subsystem by transferring the instance ownership to the parent subsystem from the first controller discovered in that subsystem. Reviewed-by: Logan Gunthorpe Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Minwoo Im Reviewed-by: Hannes Reinecke Reviewed-off-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1777c8e6dffd..55fc0728764e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -81,7 +81,6 @@ EXPORT_SYMBOL_GPL(nvme_reset_wq); struct workqueue_struct *nvme_delete_wq; EXPORT_SYMBOL_GPL(nvme_delete_wq); -static DEFINE_IDA(nvme_subsystems_ida); static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); @@ -2345,7 +2344,8 @@ static void nvme_release_subsystem(struct device *dev) struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - ida_simple_remove(&nvme_subsystems_ida, subsys->instance); + if (subsys->instance >= 0) + ida_simple_remove(&nvme_instance_ida, subsys->instance); kfree(subsys); } @@ -2485,12 +2485,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); if (!subsys) return -ENOMEM; - ret = ida_simple_get(&nvme_subsystems_ida, 0, 0, GFP_KERNEL); - if (ret < 0) { - kfree(subsys); - return ret; - } - subsys->instance = ret; + + subsys->instance = -1; mutex_init(&subsys->lock); kref_init(&subsys->ref); INIT_LIST_HEAD(&subsys->ctrls); @@ -2509,7 +2505,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) subsys->dev.class = nvme_subsys_class; subsys->dev.release = nvme_release_subsystem; subsys->dev.groups = nvme_subsys_attrs_groups; - dev_set_name(&subsys->dev, "nvme-subsys%d", subsys->instance); + dev_set_name(&subsys->dev, "nvme-subsys%d", ctrl->instance); device_initialize(&subsys->dev); mutex_lock(&nvme_subsystems_lock); @@ -2540,6 +2536,8 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) goto out_put_subsystem; } + if (!found) + subsys->instance = ctrl->instance; ctrl->subsys = subsys; list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); mutex_unlock(&nvme_subsystems_lock); @@ -3810,7 +3808,9 @@ static void nvme_free_ctrl(struct device *dev) container_of(dev, struct nvme_ctrl, ctrl_device); struct nvme_subsystem *subsys = ctrl->subsys; - ida_simple_remove(&nvme_instance_ida, ctrl->instance); + if (subsys && ctrl->instance != subsys->instance) + ida_simple_remove(&nvme_instance_ida, ctrl->instance); + kfree(ctrl->effects); nvme_mpath_uninit(ctrl); __free_page(ctrl->discard_page); @@ -4095,7 +4095,6 @@ out: static void __exit nvme_core_exit(void) { - ida_destroy(&nvme_subsystems_ida); class_destroy(nvme_subsys_class); class_destroy(nvme_class); unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); From 97b3807e93036819cabd803490c2bc6e2e58167c Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 5 Sep 2019 18:41:06 +0300 Subject: [PATCH 12/18] nvme: Remove redundant assignment of cq vector The cq vector is already assigned with the correct value. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Keith Busch Reviewed-off-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5c3732fd02bc..52205f8d90b4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1555,7 +1555,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) nvme_init_queue(nvmeq, qid); if (!polled) { - nvmeq->cq_vector = vector; result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; From 1179d337be70e67baa2d8121677c310fea4f72c3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 6 Sep 2019 19:50:19 +0200 Subject: [PATCH 13/18] nvmet: Use PTR_ERR_OR_ZERO() in nvmet_init_discovery() Simplify this function implementation by using a known function. Generated by: scripts/coccinelle/api/ptr_ret.cocci Signed-off-by: Markus Elfring Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg --- drivers/nvme/target/discovery.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 8efca26b4776..3764a8900850 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -381,9 +381,7 @@ int __init nvmet_init_discovery(void) { nvmet_disc_subsys = nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC); - if (IS_ERR(nvmet_disc_subsys)) - return PTR_ERR(nvmet_disc_subsys); - return 0; + return PTR_ERR_OR_ZERO(nvmet_disc_subsys); } void nvmet_exit_discovery(void) From 2d352df57bcd7fee800fa1aa7257e68bfca64d2b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 12 Jul 2019 11:02:09 -0700 Subject: [PATCH 14/18] nvme-fabrics: allow discovery subsystems accept a kato This modifies the behavior of discovery subsystems to accept a kato as a preparation to support discovery log change events. This also means that now every discovery controller will have a default kato value, and for non-persistent connections the host needs to pass in a zero kato value (keep_alive_tmo=0). Reviewed-by: Minwoo Im Reviewed-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fabrics.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 145c210edb03..74b8818ac9a1 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -381,8 +381,8 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) * Set keep-alive timeout in seconds granularity (ms * 1000) * and add a grace period for controller kato enforcement */ - cmd.connect.kato = ctrl->opts->discovery_nqn ? 0 : - cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000); + cmd.connect.kato = ctrl->kato ? + cpu_to_le32((ctrl->kato + NVME_KATO_GRACE) * 1000) : 0; if (ctrl->opts->disable_sqflow) cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; @@ -740,13 +740,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n"); } opts->kato = token; - - if (opts->discovery_nqn && opts->kato) { - pr_err("Discovery controllers cannot accept KATO != 0\n"); - ret = -EINVAL; - goto out; - } - break; case NVMF_OPT_CTRL_LOSS_TMO: if (match_int(args, &token)) { @@ -883,7 +876,6 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } if (opts->discovery_nqn) { - opts->kato = 0; opts->nr_io_queues = 0; opts->nr_write_queues = 0; opts->nr_poll_queues = 0; From 93da40239b1069ef96bbfe7c8d08edb347e8107a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Aug 2019 11:25:46 -0700 Subject: [PATCH 15/18] nvme: enable aen regardless of the presence of I/O queues AENs in general are not related to the presence of I/O queues, so enable them regardless. Note that the only exception is that discovery controller will not support any of the requested AENs and nvme_enable_aen will respect that and return, so it is still safe to enable regardless. Note it is safe to enable AENs even before the initial namespace scanning as we have the scan operation in a workqueue context. Reviewed-by: Minwoo Im Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 55fc0728764e..573e72139331 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1200,6 +1200,8 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl) if (status) dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", supported_aens); + + queue_work(nvme_wq, &ctrl->async_event_work); } static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) @@ -3785,10 +3787,10 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->kato) nvme_start_keep_alive(ctrl); + nvme_enable_aen(ctrl); + if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); - nvme_enable_aen(ctrl); - queue_work(nvme_wq, &ctrl->async_event_work); nvme_start_queues(ctrl); } } From a42f42e5bb84d82f1e9890f33364c4c04997c323 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 4 Sep 2019 14:29:48 -0700 Subject: [PATCH 16/18] nvme: add uevent variables for controller devices When we send uevents to userspace, add controller specific environment variables to uniquly identify the controller beyond its device name. This will be useful to address discovery log change events by actually verifying that the discovery controller is indeed the same as the device that generated the event. Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 573e72139331..7d4e0c6f6d49 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3635,6 +3635,33 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_remove_namespaces); +static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct nvme_ctrl *ctrl = + container_of(dev, struct nvme_ctrl, ctrl_device); + struct nvmf_ctrl_options *opts = ctrl->opts; + int ret; + + ret = add_uevent_var(env, "NVME_TRTYPE=%s", ctrl->ops->name); + if (ret) + return ret; + + if (opts) { + ret = add_uevent_var(env, "NVME_TRADDR=%s", opts->traddr); + if (ret) + return ret; + + ret = add_uevent_var(env, "NVME_TRSVCID=%s", + opts->trsvcid ?: "none"); + if (ret) + return ret; + + ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s", + opts->host_traddr ?: "none"); + } + return ret; +} + static void nvme_aen_uevent(struct nvme_ctrl *ctrl) { char *envp[2] = { NULL, NULL }; @@ -4073,6 +4100,7 @@ static int __init nvme_core_init(void) result = PTR_ERR(nvme_class); goto unregister_chrdev; } + nvme_class->dev_uevent = nvme_class_uevent; nvme_subsys_class = class_create(THIS_MODULE, "nvme-subsystem"); if (IS_ERR(nvme_subsys_class)) { From 85f8a4351dfd75a719a82c681739e7bcf17bbf9e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 12 Jul 2019 11:02:10 -0700 Subject: [PATCH 17/18] nvme: send discovery log page change events to userspace If the controller supports discovery log page change events, we want to enable it. When we see a discovery log change event we will send it up to userspace and expect it to handle it. Reviewed-by: Minwoo Im Reviewed-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7d4e0c6f6d49..81f8b1841b0e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1185,7 +1185,8 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) EXPORT_SYMBOL_GPL(nvme_set_queue_count); #define NVME_AEN_SUPPORTED \ - (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE) + (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \ + NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE) static void nvme_enable_aen(struct nvme_ctrl *ctrl) { @@ -3768,6 +3769,9 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) queue_work(nvme_wq, &ctrl->ana_work); break; #endif + case NVME_AER_NOTICE_DISC_CHANGED: + ctrl->aen_result = result; + break; default: dev_warn(ctrl->device, "async event result %08x\n", result); } From 5f8badbcbeac298a77ee634a10a375f3e66923f9 Mon Sep 17 00:00:00 2001 From: Amit Date: Thu, 12 Sep 2019 08:29:39 +0300 Subject: [PATCH 18/18] nvmet: fix a wrong error status returned in error log page When the command data_len cannot hold all the controller errors, we should simply return as much errors as we can fit instead of failing the command. Signed-off-by: Amit Engel Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/target/admin-cmd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 51800a9ce9a9..831a062d27cb 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -37,7 +37,6 @@ static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) static void nvmet_execute_get_log_page_error(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - u16 status = NVME_SC_SUCCESS; unsigned long flags; off_t offset = 0; u64 slot; @@ -47,9 +46,8 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req) slot = ctrl->err_counter % NVMET_ERROR_LOG_SLOTS; for (i = 0; i < NVMET_ERROR_LOG_SLOTS; i++) { - status = nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot], - sizeof(struct nvme_error_slot)); - if (status) + if (nvmet_copy_to_sgl(req, offset, &ctrl->slots[slot], + sizeof(struct nvme_error_slot))) break; if (slot == 0) @@ -59,7 +57,7 @@ static void nvmet_execute_get_log_page_error(struct nvmet_req *req) offset += sizeof(struct nvme_error_slot); } spin_unlock_irqrestore(&ctrl->error_lock, flags); - nvmet_req_complete(req, status); + nvmet_req_complete(req, 0); } static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,