nvme_fc: correct abort race condition on resets

During reset handling, there is live io completing while the reset
is taking place. The reset path attempts to abort all outstanding io,
counting the number of ios that were reset. It then waits for those
ios to be reclaimed from the lldd before continuing.

The transport's logic on io state and flag setting was poor, allowing
ios to complete simultaneous to the abort request. The completed ios
were counted, but as the completion had already occurred, the
completion never reduced the count. As the count never zeros, the
reset/delete never completes.

Tighten it up by unconditionally changing the op state to completed
when the io done handler is called.  The reset/abort path now changes
the op state to aborted, but the abort only continues if the op
state was live priviously. If complete, the abort is backed out.
Thus proper counting of io aborts and their completions is working
again.

Also removed the TERMIO state on the op as it's redundant with the
op's aborted state.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
This commit is contained in:
James Smart 2018-02-06 06:48:29 -08:00 committed by Sagi Grimberg
parent 8cb6af7b3a
commit 3efd6e8ebe

View File

@ -1512,13 +1512,19 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq,
static int static int
__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
{ {
int state; unsigned long flags;
int opstate;
state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); spin_lock_irqsave(&ctrl->lock, flags);
if (state != FCPOP_STATE_ACTIVE) { opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
atomic_set(&op->state, state); if (opstate != FCPOP_STATE_ACTIVE)
atomic_set(&op->state, opstate);
else if (ctrl->flags & FCCTRL_TERMIO)
ctrl->iocnt++;
spin_unlock_irqrestore(&ctrl->lock, flags);
if (opstate != FCPOP_STATE_ACTIVE)
return -ECANCELED; return -ECANCELED;
}
ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, ctrl->lport->ops->fcp_abort(&ctrl->lport->localport,
&ctrl->rport->remoteport, &ctrl->rport->remoteport,
@ -1532,52 +1538,23 @@ static void
nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
{ {
struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
unsigned long flags; int i;
int i, ret;
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) __nvme_fc_abort_op(ctrl, aen_op);
continue;
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO) {
ctrl->iocnt++;
aen_op->flags |= FCOP_FLAGS_TERMIO;
}
spin_unlock_irqrestore(&ctrl->lock, flags);
ret = __nvme_fc_abort_op(ctrl, aen_op);
if (ret) {
/*
* if __nvme_fc_abort_op failed the io wasn't
* active. Thus this call path is running in
* parallel to the io complete. Treat as non-error.
*/
/* back out the flags/counters */
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO)
ctrl->iocnt--;
aen_op->flags &= ~FCOP_FLAGS_TERMIO;
spin_unlock_irqrestore(&ctrl->lock, flags);
return;
}
}
} }
static inline int static inline int
__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_fcp_op *op) struct nvme_fc_fcp_op *op, int opstate)
{ {
unsigned long flags; unsigned long flags;
bool complete_rq = false; bool complete_rq = false;
spin_lock_irqsave(&ctrl->lock, flags); spin_lock_irqsave(&ctrl->lock, flags);
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { if (opstate == FCPOP_STATE_ABORTED && ctrl->flags & FCCTRL_TERMIO) {
if (ctrl->flags & FCCTRL_TERMIO) { if (!--ctrl->iocnt)
if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait);
wake_up(&ctrl->ioabort_wait);
}
} }
if (op->flags & FCOP_FLAGS_RELEASED) if (op->flags & FCOP_FLAGS_RELEASED)
complete_rq = true; complete_rq = true;
@ -1601,6 +1578,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result; union nvme_result result;
bool terminate_assoc = true; bool terminate_assoc = true;
int opstate;
/* /*
* WARNING: * WARNING:
@ -1639,11 +1617,12 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
* association to be terminated. * association to be terminated.
*/ */
opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
sizeof(op->rsp_iu), DMA_FROM_DEVICE); sizeof(op->rsp_iu), DMA_FROM_DEVICE);
if (atomic_read(&op->state) == FCPOP_STATE_ABORTED || if (opstate == FCPOP_STATE_ABORTED)
op->flags & FCOP_FLAGS_TERMIO)
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
else if (freq->status) else if (freq->status)
status = cpu_to_le16(NVME_SC_INTERNAL << 1); status = cpu_to_le16(NVME_SC_INTERNAL << 1);
@ -1708,7 +1687,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
done: done:
if (op->flags & FCOP_FLAGS_AEN) { if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
__nvme_fc_fcpop_chk_teardowns(ctrl, op); __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
atomic_set(&op->state, FCPOP_STATE_IDLE); atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */ op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl); nvme_fc_ctrl_put(ctrl);
@ -1725,7 +1704,7 @@ done:
ctrl->ctrl.state == NVME_CTRL_CONNECTING)) ctrl->ctrl.state == NVME_CTRL_CONNECTING))
status |= cpu_to_le16(NVME_SC_DNR << 1); status |= cpu_to_le16(NVME_SC_DNR << 1);
if (__nvme_fc_fcpop_chk_teardowns(ctrl, op)) if (__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate))
__nvme_fc_final_op_cleanup(rq); __nvme_fc_final_op_cleanup(rq);
else else
nvme_end_request(rq, status, result); nvme_end_request(rq, status, result);
@ -2421,8 +2400,7 @@ __nvme_fc_final_op_cleanup(struct request *rq)
struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_ctrl *ctrl = op->ctrl;
atomic_set(&op->state, FCPOP_STATE_IDLE); atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | op->flags &= ~(FCOP_FLAGS_RELEASED | FCOP_FLAGS_COMPLETE);
FCOP_FLAGS_COMPLETE);
nvme_fc_unmap_data(ctrl, rq, op); nvme_fc_unmap_data(ctrl, rq, op);
nvme_complete_rq(rq); nvme_complete_rq(rq);
@ -2476,35 +2454,11 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
struct nvme_ctrl *nctrl = data; struct nvme_ctrl *nctrl = data;
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
unsigned long flags;
int status;
if (!blk_mq_request_started(req)) if (!blk_mq_request_started(req))
return; return;
spin_lock_irqsave(&ctrl->lock, flags); __nvme_fc_abort_op(ctrl, op);
if (ctrl->flags & FCCTRL_TERMIO) {
ctrl->iocnt++;
op->flags |= FCOP_FLAGS_TERMIO;
}
spin_unlock_irqrestore(&ctrl->lock, flags);
status = __nvme_fc_abort_op(ctrl, op);
if (status) {
/*
* if __nvme_fc_abort_op failed the io wasn't
* active. Thus this call path is running in
* parallel to the io complete. Treat as non-error.
*/
/* back out the flags/counters */
spin_lock_irqsave(&ctrl->lock, flags);
if (ctrl->flags & FCCTRL_TERMIO)
ctrl->iocnt--;
op->flags &= ~FCOP_FLAGS_TERMIO;
spin_unlock_irqrestore(&ctrl->lock, flags);
return;
}
} }