From 66c6afbd7321983be4c4160aff4d79e52af84a60 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 14 Dec 2018 18:31:21 +0000 Subject: [PATCH 01/13] nvmet: fix comparison of a u16 with -1 Currently the u16 req->error_loc is being compared to -1 which will always be false. Fix this by casting -1 to u16 to fix this. Detected by clang: warning: result of comparison of constant -1 with expression of type 'u16' (aka 'unsigned short') is always false [-Wtautological-constant-out-of-range-compare] Fixes: 76574f37bf4c ("nvmet: add interface to update error-log page") Signed-off-by: Colin Ian King Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cc81d0231587..b9c219c931eb 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -663,7 +663,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) req->rsp->status = cpu_to_le16(status << 1); - if (!ctrl || req->error_loc == -1) + if (!ctrl || req->error_loc == (u16)-1) return; spin_lock_irqsave(&ctrl->error_lock, flags); From 5698b805fbf09a5dfa60829f5a179c0568a8dc3b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 17 Dec 2018 18:35:29 -0800 Subject: [PATCH 02/13] nvmet: use a macro for default error location This patch defines a new macro NVMET_NO_ERROR_LOC to represent the default error location value in the nvme-error-log-page. This is a pure cleanup patch and it does not change any functionality. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 ++-- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b9c219c931eb..88d260f31835 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -663,7 +663,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) req->rsp->status = cpu_to_le16(status << 1); - if (!ctrl || req->error_loc == (u16)-1) + if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) return; spin_lock_irqsave(&ctrl->error_lock, flags); @@ -849,7 +849,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->rsp->status = 0; req->rsp->sq_head = 0; req->ns = NULL; - req->error_loc = -1; + req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; /* no support for fused commands yet */ diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 3b5f0bcaf3e8..3e4719fdba85 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -30,6 +30,7 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 +#define NVMET_NO_ERROR_LOC ((u16)-1) /* * Supported optional AENs: From ed92ad37e88555864f1f830db4037b9535b3392c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Dec 2018 14:06:59 +0100 Subject: [PATCH 03/13] nvme-pci: only set nr_maps to 2 if poll queues are supported The block layer now enables polling support on a queue if nr_maps includes the poll map, so we should only set that if we actually support poll queues. Fixes: 6544d229bf ("block: enable polling by default if a poll map is initalized") Signed-off-by: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 698b350b38cf..a3e0b9378e54 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2289,6 +2289,9 @@ static int nvme_dev_add(struct nvme_dev *dev) if (!dev->ctrl.tagset) { dev->tagset.ops = &nvme_mq_ops; dev->tagset.nr_hw_queues = dev->online_queues - 1; + dev->tagset.nr_maps = 2; /* default + read */ + if (dev->io_queues[HCTX_TYPE_POLL]) + dev->tagset.nr_maps++; dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); From 91a509f8b7a8a518723e1755b876b46c537baaef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Dec 2018 09:48:00 +0100 Subject: [PATCH 04/13] nvme-pci: refactor nvme_poll_irqdisable to make sparse happy By duplicating the nvme_process_cq in both branches we keep the sparse lock context checking happy, so do it. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a3e0b9378e54..452b28130380 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1089,15 +1089,15 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) * using the CQ lock. For normal interrupt driven threads we have * to disable the interrupt to avoid racing with it. */ - if (nvmeq->cq_vector == -1) + if (nvmeq->cq_vector == -1) { spin_lock(&nvmeq->cq_poll_lock); - else - disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); - found = nvme_process_cq(nvmeq, &start, &end, tag); - if (nvmeq->cq_vector == -1) + found = nvme_process_cq(nvmeq, &start, &end, tag); spin_unlock(&nvmeq->cq_poll_lock); - else + } else { + disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + found = nvme_process_cq(nvmeq, &start, &end, tag); enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); + } nvme_complete_cqes(nvmeq, start, end); return found; From f4d10b5c85b5f118c6e55288afd053f5c8c5fc98 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Dec 2018 09:41:15 +0100 Subject: [PATCH 05/13] nvmet-tcp: fix endianess annotations Signed-off-by: Christoph Hellwig Acked-by: Sagi Grimberg --- drivers/nvme/target/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index d31bec260160..44b37b202e39 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -758,7 +758,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) if (icreq->maxr2t != 0) { pr_err("queue %d: unsupported maxr2t %d\n", queue->idx, - le16_to_cpu(icreq->maxr2t) + 1); + le32_to_cpu(icreq->maxr2t) + 1); return -EPROTO; } @@ -776,7 +776,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); - icresp->maxdata = 0xffff; /* FIXME: support r2t */ + icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; From a7273d40232fbc1f89212b973d588aeaa61161b2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Dec 2018 09:46:59 +0100 Subject: [PATCH 06/13] nvme-tcp: fix endianess annotations Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/tcp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 51826479a41e..83417b4473b3 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -35,7 +35,7 @@ struct nvme_tcp_request { u32 pdu_sent; u16 ttag; struct list_head entry; - u32 ddgst; + __le32 ddgst; struct bio *curr_bio; struct iov_iter iter; @@ -272,7 +272,8 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) return req; } -static inline void nvme_tcp_ddgst_final(struct ahash_request *hash, u32 *dgst) +static inline void nvme_tcp_ddgst_final(struct ahash_request *hash, + __le32 *dgst) { ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0); crypto_ahash_final(hash); @@ -817,7 +818,7 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req) union nvme_result res = {}; nvme_end_request(blk_mq_rq_from_pdu(req), - NVME_SC_DATA_XFER_ERROR, res); + cpu_to_le16(NVME_SC_DATA_XFER_ERROR), res); } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) @@ -1960,7 +1961,7 @@ nvme_tcp_timeout(struct request *rq, bool reserved) union nvme_result res = {}; nvme_req(rq)->flags |= NVME_REQ_CANCELLED; - nvme_end_request(rq, NVME_SC_ABORT_REQ, res); + nvme_end_request(rq, cpu_to_le16(NVME_SC_ABORT_REQ), res); return BLK_EH_DONE; } From 56a77d26d6316a3936497236c7e3a6a98fad950c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 14 Dec 2018 11:42:43 +0000 Subject: [PATCH 07/13] nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" There is a spelling mistake in a dev_info message, fix it. Signed-off-by: Colin Ian King Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 83417b4473b3..f49cbe9aa19f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1790,7 +1790,7 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) if (nvme_tcp_setup_ctrl(ctrl, false)) goto requeue; - dev_info(ctrl->device, "Successfully reconnected (%d attepmpt)\n", + dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", ctrl->nr_reconnects); ctrl->nr_reconnects = 0; From 7b7ab780a048699d2b9f416bf2d5c089d8d1028c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Dec 2018 11:06:06 -0800 Subject: [PATCH 08/13] block: make request_to_qc_t public block consumers will need it for polling requests that are sent with blk_execute_rq_nowait. Also, get rid of blk_tag_to_qc_t and open-code it instead. Reviewed-by: Jens Axboe Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- block/blk-mq.c | 8 -------- include/linux/blk-mq.h | 10 ++++++++++ include/linux/blk_types.h | 11 ----------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2de972857496..3ba37b9e15e9 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1749,14 +1749,6 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) blk_account_io_start(rq, true); } -static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq) -{ - if (rq->tag != -1) - return blk_tag_to_qc_t(rq->tag, hctx->queue_num, false); - - return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true); -} - static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, struct request *rq, blk_qc_t *cookie, bool last) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d3c0a0d2680b..0e030f5f76b6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -357,4 +357,14 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) +static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag != -1) + return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); + + return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | + BLK_QC_T_INTERNAL; +} + #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fc99474ac968..5c7e7f859a24 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -425,17 +425,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie) return cookie != BLK_QC_T_NONE; } -static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, - bool internal) -{ - blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); - - if (internal) - ret |= BLK_QC_T_INTERNAL; - - return ret; -} - static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; From 6287b51c77e6d8f05f772931cf51d80e81651a9f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Dec 2018 11:06:07 -0800 Subject: [PATCH 09/13] nvme-core: optionally poll sync commands Pass poll bool to indicate that we need it to poll. This prepares us for polling support in nvmf since connect is an I/O that will be queued and has to be polled in order to complete. If poll is passed, we call nvme_execute_rq_polled which sends the requests and polls for its completion. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 38 ++++++++++++++++++++++++++++++++----- drivers/nvme/host/fabrics.c | 10 +++++----- drivers/nvme/host/nvme.h | 2 +- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 136512e8ba58..08f2c92602f4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -724,6 +724,31 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, } EXPORT_SYMBOL_GPL(nvme_setup_cmd); +static void nvme_end_sync_rq(struct request *rq, blk_status_t error) +{ + struct completion *waiting = rq->end_io_data; + + rq->end_io_data = NULL; + complete(waiting); +} + +static void nvme_execute_rq_polled(struct request_queue *q, + struct gendisk *bd_disk, struct request *rq, int at_head) +{ + DECLARE_COMPLETION_ONSTACK(wait); + + WARN_ON_ONCE(!test_bit(QUEUE_FLAG_POLL, &q->queue_flags)); + + rq->cmd_flags |= REQ_HIPRI; + rq->end_io_data = &wait; + blk_execute_rq_nowait(q, bd_disk, rq, at_head, nvme_end_sync_rq); + + while (!completion_done(&wait)) { + blk_poll(q, request_to_qc_t(rq->mq_hctx, rq), true); + cond_resched(); + } +} + /* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code @@ -731,7 +756,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, - blk_mq_req_flags_t flags) + blk_mq_req_flags_t flags, bool poll) { struct request *req; int ret; @@ -748,7 +773,10 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, goto out; } - blk_execute_rq(req->q, NULL, req, at_head); + if (poll) + nvme_execute_rq_polled(req->q, NULL, req, at_head); + else + blk_execute_rq(req->q, NULL, req, at_head); if (result) *result = nvme_req(req)->result; if (nvme_req(req)->flags & NVME_REQ_CANCELLED) @@ -765,7 +793,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buffer, unsigned bufflen) { return __nvme_submit_sync_cmd(q, cmd, NULL, buffer, bufflen, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); @@ -1084,7 +1112,7 @@ static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword c.features.dword11 = cpu_to_le32(dword11); ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &res, - buffer, buflen, 0, NVME_QID_ANY, 0, 0); + buffer, buflen, 0, NVME_QID_ANY, 0, 0, false); if (ret >= 0 && result) *result = le32_to_cpu(res.u32); return ret; @@ -1727,7 +1755,7 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, cmd.common.cdw11 = cpu_to_le32(len); return __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, buffer, len, - ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0); + ADMIN_TIMEOUT, NVME_QID_ANY, 1, 0, false); } EXPORT_SYMBOL_GPL(nvme_sec_submit); #endif /* CONFIG_BLK_SED_OPAL */ diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 19ff0eae4582..d93a169f6403 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -159,7 +159,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -206,7 +206,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); if (ret >= 0) *val = le64_to_cpu(res.u64); @@ -252,7 +252,7 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, NULL, NULL, 0, 0, - NVME_QID_ANY, 0, 0); + NVME_QID_ANY, 0, 0, false); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", @@ -406,7 +406,7 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) ret = __nvme_submit_sync_cmd(ctrl->admin_q, &cmd, &res, data, sizeof(*data), 0, NVME_QID_ANY, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); @@ -468,7 +468,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), 0, qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 39b52f4d9b24..2b36ac922596 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -447,7 +447,7 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, - blk_mq_req_flags_t flags); + blk_mq_req_flags_t flags, bool poll); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); From 26c682274e0a7d055e123499eac8ec39d0e04283 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Dec 2018 11:06:08 -0800 Subject: [PATCH 10/13] nvme-fabrics: allow nvmf_connect_io_queue to poll Preparation for polling support for fabrics. Polling support means that our completion queues are not generating any interrupts which means we need to poll for the nvmf io queue connect as well. Reviewed by Steve Wise Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 4 ++-- drivers/nvme/host/fabrics.h | 2 +- drivers/nvme/host/fc.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/host/tcp.c | 2 +- drivers/nvme/target/loop.c | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index d93a169f6403..5ff14f46a8d9 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue); * > 0: NVMe error status code * < 0: Linux errno error code */ -int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) +int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll) { struct nvme_command cmd; struct nvmf_connect_data *data; @@ -468,7 +468,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), 0, qid, 1, - BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, false); + BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT, poll); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 81b8fd1c0c5d..cad70a9f976a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -168,7 +168,7 @@ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val); int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val); int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val); int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl); -int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid); +int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid, bool poll); int nvmf_register_transport(struct nvmf_transport_ops *ops); void nvmf_unregister_transport(struct nvmf_transport_ops *ops); void nvmf_free_options(struct nvmf_ctrl_options *opts); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b79e41938513..89accc76d71c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1975,7 +1975,7 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) (qsize / 5)); if (ret) break; - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); if (ret) break; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ed726da77b5b..b907ed43814f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -598,7 +598,7 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) int ret; if (idx) - ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, false); else ret = nvmf_connect_admin_queue(&ctrl->ctrl); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index f49cbe9aa19f..de174912445e 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1394,7 +1394,7 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) int ret; if (idx) - ret = nvmf_connect_io_queue(nctrl, idx); + ret = nvmf_connect_io_queue(nctrl, idx, false); else ret = nvmf_connect_admin_queue(nctrl); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9908082b32c4..4aac1b4a8112 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -345,7 +345,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); + ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); if (ret) return ret; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); From 89d43802b0e7298bc0b464bdb5fff864074e2218 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Dec 2018 11:06:09 -0800 Subject: [PATCH 11/13] nvme-fabrics: allow user to pass in nr_poll_queues This argument will specify how many polling I/O queues to connect when creating the controller. These I/O queues will host I/O that is set with REQ_HIPRI. Reviewed-by: Steve Wise Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 13 +++++++++++++ drivers/nvme/host/fabrics.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5ff14f46a8d9..b2ab213f43de 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -617,6 +617,7 @@ static const match_table_t opt_tokens = { { NVMF_OPT_HDR_DIGEST, "hdr_digest" }, { NVMF_OPT_DATA_DIGEST, "data_digest" }, { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, + { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_ERR, NULL } }; @@ -850,6 +851,18 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->nr_write_queues = token; break; + case NVMF_OPT_NR_POLL_QUEUES: + if (match_int(args, &token)) { + ret = -EINVAL; + goto out; + } + if (token <= 0) { + pr_err("Invalid nr_poll_queues %d\n", token); + ret = -EINVAL; + goto out; + } + opts->nr_poll_queues = token; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index cad70a9f976a..478343b73e38 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -62,6 +62,7 @@ enum { NVMF_OPT_HDR_DIGEST = 1 << 15, NVMF_OPT_DATA_DIGEST = 1 << 16, NVMF_OPT_NR_WRITE_QUEUES = 1 << 17, + NVMF_OPT_NR_POLL_QUEUES = 1 << 18, }; /** @@ -93,6 +94,7 @@ enum { * @hdr_digest: generate/verify header digest (TCP) * @data_digest: generate/verify data digest (TCP) * @nr_write_queues: number of queues for write I/O + * @nr_poll_queues: number of queues for polling I/O */ struct nvmf_ctrl_options { unsigned mask; @@ -113,6 +115,7 @@ struct nvmf_ctrl_options { bool hdr_digest; bool data_digest; unsigned int nr_write_queues; + unsigned int nr_poll_queues; }; /* From ff8519f9e91170cd24ed503aa77d144e71bd4d92 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Dec 2018 11:06:10 -0800 Subject: [PATCH 12/13] nvme-rdma: implement polling queue map When passed with nr_poll_queues setup additional queues with cq polling context IB_POLL_DIRECT (no interrupts) and make sure to set QUEUE_FLAG_POLL on the connect_q. In addition add the third queue mapping for polling queues. nvmf connect on this queue is polled for like all other requests so make nvmf_connect_io_queue poll for polling queues. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 49 +++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b907ed43814f..0a2fd2949ad7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -162,6 +162,13 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue) return queue - queue->ctrl->queues; } +static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue) +{ + return nvme_rdma_queue_idx(queue) > + queue->ctrl->ctrl.opts->nr_io_queues + + queue->ctrl->ctrl.opts->nr_write_queues; +} + static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue) { return queue->cmnd_capsule_len - sizeof(struct nvme_command); @@ -440,6 +447,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) const int send_wr_factor = 3; /* MR, SEND, INV */ const int cq_factor = send_wr_factor + 1; /* + RECV */ int comp_vector, idx = nvme_rdma_queue_idx(queue); + enum ib_poll_context poll_ctx; int ret; queue->device = nvme_rdma_find_get_device(queue->cm_id); @@ -456,10 +464,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) */ comp_vector = idx == 0 ? idx : idx - 1; + /* Polling queues need direct cq polling context */ + if (nvme_rdma_poll_queue(queue)) + poll_ctx = IB_POLL_DIRECT; + else + poll_ctx = IB_POLL_SOFTIRQ; + /* +1 for ib_stop_cq */ queue->ib_cq = ib_alloc_cq(ibdev, queue, cq_factor * queue->queue_size + 1, - comp_vector, IB_POLL_SOFTIRQ); + comp_vector, poll_ctx); if (IS_ERR(queue->ib_cq)) { ret = PTR_ERR(queue->ib_cq); goto out_put_dev; @@ -595,15 +609,17 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) { + struct nvme_rdma_queue *queue = &ctrl->queues[idx]; + bool poll = nvme_rdma_poll_queue(queue); int ret; if (idx) - ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, false); + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll); else ret = nvmf_connect_admin_queue(&ctrl->ctrl); if (!ret) - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); + set_bit(NVME_RDMA_Q_LIVE, &queue->flags); else dev_info(ctrl->ctrl.device, "failed to connect queue: %d ret=%d\n", idx, ret); @@ -646,6 +662,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) ibdev->num_comp_vectors); nr_io_queues += min(opts->nr_write_queues, num_online_cpus()); + nr_io_queues += min(opts->nr_poll_queues, num_online_cpus()); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) @@ -716,7 +733,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = 2 /* default + read */; + set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; } ret = blk_mq_alloc_tag_set(set); @@ -1742,6 +1759,13 @@ err: return BLK_STS_IOERR; } +static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) +{ + struct nvme_rdma_queue *queue = hctx->driver_data; + + return ib_process_cq_direct(queue->ib_cq, -1); +} + static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); @@ -1772,6 +1796,17 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) ctrl->device->dev, 0); blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ], ctrl->device->dev, 0); + + if (ctrl->ctrl.opts->nr_poll_queues) { + set->map[HCTX_TYPE_POLL].nr_queues = + ctrl->ctrl.opts->nr_poll_queues; + set->map[HCTX_TYPE_POLL].queue_offset = + ctrl->ctrl.opts->nr_io_queues; + if (ctrl->ctrl.opts->nr_write_queues) + set->map[HCTX_TYPE_POLL].queue_offset += + ctrl->ctrl.opts->nr_write_queues; + blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); + } return 0; } @@ -1783,6 +1818,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = { .init_hctx = nvme_rdma_init_hctx, .timeout = nvme_rdma_timeout, .map_queues = nvme_rdma_map_queues, + .poll = nvme_rdma_poll, }; static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { @@ -1927,7 +1963,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); - ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1; + ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + + opts->nr_poll_queues + 1; ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; @@ -1979,7 +2016,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .required_opts = NVMF_OPT_TRADDR, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | - NVMF_OPT_NR_WRITE_QUEUES, + NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES, .create_ctrl = nvme_rdma_create_ctrl, }; From 604c01d567cb9ee7d19dc598272cb90ab6229a8a Mon Sep 17 00:00:00 2001 From: yupeng Date: Tue, 18 Dec 2018 17:59:53 +0100 Subject: [PATCH 13/13] nvme-pci: trace SQ status on completions Export the disk name, queue id, sq_head, sq_tail to a trace event in completion handling. Usage example: cd /sys/kernel/debug/tracing/events/nvme/nvme_sq echo 'disk=="nvme1n1"' > filter echo 1 > enable cat /sys/kernel/debug/tracing/trace_pipe Signed-off-by: yupeng Reviewed-by: Sagi Grimberg Reviewed-by: Keith Busch [hch: slight formatting tweaks, use standard nvme tracepoint conventions] Signed-off-by: Christoph Hellwig wip --- drivers/nvme/host/pci.c | 2 ++ drivers/nvme/host/trace.c | 3 +++ drivers/nvme/host/trace.h | 23 +++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 452b28130380..5a0bf6a24d50 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -32,6 +32,7 @@ #include #include +#include "trace.h" #include "nvme.h" #define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) @@ -1003,6 +1004,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id); + trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); nvme_end_request(req, cqe->status, cqe->result); } diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c index 25b0e310f4a8..5566dda3237a 100644 --- a/drivers/nvme/host/trace.c +++ b/drivers/nvme/host/trace.c @@ -139,3 +139,6 @@ const char *nvme_trace_disk_name(struct trace_seq *p, char *name) return ret; } +EXPORT_SYMBOL_GPL(nvme_trace_disk_name); + +EXPORT_TRACEPOINT_SYMBOL_GPL(nvme_sq); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 1978deb6fcc7..3564120aa7b3 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -184,6 +184,29 @@ TRACE_EVENT(nvme_async_event, #undef aer_name +TRACE_EVENT(nvme_sq, + TP_PROTO(struct request *req, __le16 sq_head, int sq_tail), + TP_ARGS(req, sq_head, sq_tail), + TP_STRUCT__entry( + __field(int, ctrl_id) + __array(char, disk, DISK_NAME_LEN) + __field(int, qid) + __field(u16, sq_head) + __field(u16, sq_tail) + ), + TP_fast_assign( + __entry->ctrl_id = nvme_req(req)->ctrl->instance; + __assign_disk_name(__entry->disk, req->rq_disk); + __entry->qid = nvme_req_qid(req); + __entry->sq_head = le16_to_cpu(sq_head); + __entry->sq_tail = sq_tail; + ), + TP_printk("nvme%d: %sqid=%d, head=%u, tail=%u", + __entry->ctrl_id, __print_disk_name(__entry->disk), + __entry->qid, __entry->sq_head, __entry->sq_tail + ) +); + #endif /* _TRACE_NVME_H */ #undef TRACE_INCLUDE_PATH