nvme-rdma: Support ctrl_loss_tmo

Before scheduling a reconnect attempt, check
nr_reconnects against max_reconnects, if not
exhausted (or max_reconnects is not -1), schedule
a reconnect attempts, otherwise schedule ctrl
removal.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
Sagi Grimberg 2017-03-18 20:58:29 +02:00 committed by Jens Axboe
parent 42a45274c2
commit fd8563ced8

View File

@ -706,6 +706,26 @@ free_ctrl:
kfree(ctrl);
}
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
{
/* If we are resetting/deleting then do nothing */
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
ctrl->ctrl.state == NVME_CTRL_LIVE);
return;
}
if (nvmf_should_reconnect(&ctrl->ctrl)) {
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
} else {
dev_info(ctrl->ctrl.device, "Removing controller...\n");
queue_work(nvme_rdma_wq, &ctrl->delete_work);
}
}
static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
{
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
@ -713,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
bool changed;
int ret;
++ctrl->ctrl.opts->nr_reconnects;
if (ctrl->queue_count > 1) {
nvme_rdma_free_io_queues(ctrl);
@ -757,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed);
ctrl->ctrl.opts->nr_reconnects = 0;
if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
@ -771,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
stop_admin_q:
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
requeue:
/* Make sure we are not resetting/deleting */
if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) {
dev_info(ctrl->ctrl.device,
"Failed reconnect attempt, requeueing...\n");
queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
}
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.opts->nr_reconnects);
nvme_rdma_reconnect_or_remove(ctrl);
}
static void nvme_rdma_error_recovery_work(struct work_struct *work)
@ -804,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl);
dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n",
ctrl->ctrl.opts->reconnect_delay);
queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
ctrl->ctrl.opts->reconnect_delay * HZ);
nvme_rdma_reconnect_or_remove(ctrl);
}
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
@ -1986,7 +2001,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
.name = "rdma",
.required_opts = NVMF_OPT_TRADDR,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR,
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
.create_ctrl = nvme_rdma_create_ctrl,
};