scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands
In the *_done() functions, instead of returning early if sp->ref_count >= 2, only decrement sp->ref_count. In qla2xxx_eh_abort(), instead of deciding what to do based on the value of sp->ref_count, decide which action to take depending on the completion status of the firmware abort. Remove srb.cwaitq and use srb.comp instead. In qla2x00_abort_srb(), call isp_ops->abort_command() directly instead of calling qla2xxx_eh_abort(). Cc: Himanshu Madhani <hmadhani@marvell.com> Cc: Giridhar Malavali <gmalavali@marvell.com> Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: Himanshu Madhani <hmadhani@marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
982cc4be05
commit
219d27d714
@ -546,7 +546,6 @@ typedef struct srb {
|
|||||||
int rc;
|
int rc;
|
||||||
int retry_count;
|
int retry_count;
|
||||||
struct completion *comp;
|
struct completion *comp;
|
||||||
wait_queue_head_t *cwaitq;
|
|
||||||
union {
|
union {
|
||||||
struct srb_iocb iocb_cmd;
|
struct srb_iocb iocb_cmd;
|
||||||
struct bsg_job *bsg_job;
|
struct bsg_job *bsg_job;
|
||||||
|
@ -137,8 +137,7 @@ static void qla_nvme_sp_ls_done(void *ptr, int res)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!atomic_dec_and_test(&sp->ref_count))
|
atomic_dec(&sp->ref_count);
|
||||||
return;
|
|
||||||
|
|
||||||
if (res)
|
if (res)
|
||||||
res = -EINVAL;
|
res = -EINVAL;
|
||||||
@ -161,8 +160,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
|
|||||||
nvme = &sp->u.iocb_cmd;
|
nvme = &sp->u.iocb_cmd;
|
||||||
fd = nvme->u.nvme.desc;
|
fd = nvme->u.nvme.desc;
|
||||||
|
|
||||||
if (!atomic_dec_and_test(&sp->ref_count))
|
atomic_dec(&sp->ref_count);
|
||||||
return;
|
|
||||||
|
|
||||||
if (res == QLA_SUCCESS) {
|
if (res == QLA_SUCCESS) {
|
||||||
fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
|
fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
|
||||||
@ -599,34 +597,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
|
|||||||
.fcprqst_priv_sz = sizeof(struct nvme_private),
|
.fcprqst_priv_sz = sizeof(struct nvme_private),
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NVME_ABORT_POLLING_PERIOD 2
|
|
||||||
static int qla_nvme_wait_on_command(srb_t *sp)
|
|
||||||
{
|
|
||||||
int ret = QLA_SUCCESS;
|
|
||||||
|
|
||||||
wait_event_timeout(sp->nvme_ls_waitq, (atomic_read(&sp->ref_count) > 1),
|
|
||||||
NVME_ABORT_POLLING_PERIOD*HZ);
|
|
||||||
|
|
||||||
if (atomic_read(&sp->ref_count) > 1)
|
|
||||||
ret = QLA_FUNCTION_FAILED;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void qla_nvme_abort(struct qla_hw_data *ha, struct srb *sp, int res)
|
|
||||||
{
|
|
||||||
int rval;
|
|
||||||
|
|
||||||
if (ha->flags.fw_started) {
|
|
||||||
rval = ha->isp_ops->abort_command(sp);
|
|
||||||
if (!rval && !qla_nvme_wait_on_command(sp))
|
|
||||||
ql_log(ql_log_warn, NULL, 0x2112,
|
|
||||||
"timed out waiting on sp=%p\n", sp);
|
|
||||||
} else {
|
|
||||||
sp->done(sp, res);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void qla_nvme_unregister_remote_port(struct work_struct *work)
|
static void qla_nvme_unregister_remote_port(struct work_struct *work)
|
||||||
{
|
{
|
||||||
struct fc_port *fcport = container_of(work, struct fc_port,
|
struct fc_port *fcport = container_of(work, struct fc_port,
|
||||||
|
@ -145,7 +145,6 @@ struct pt_ls4_rx_unsol {
|
|||||||
int qla_nvme_register_hba(struct scsi_qla_host *);
|
int qla_nvme_register_hba(struct scsi_qla_host *);
|
||||||
int qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
|
int qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
|
||||||
void qla_nvme_delete(struct scsi_qla_host *);
|
void qla_nvme_delete(struct scsi_qla_host *);
|
||||||
void qla_nvme_abort(struct qla_hw_data *, struct srb *sp, int res);
|
|
||||||
void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
|
void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
|
||||||
struct req_que *);
|
struct req_que *);
|
||||||
void qla24xx_async_gffid_sp_done(void *, int);
|
void qla24xx_async_gffid_sp_done(void *, int);
|
||||||
|
@ -714,7 +714,7 @@ qla2x00_sp_compl(void *ptr, int res)
|
|||||||
{
|
{
|
||||||
srb_t *sp = ptr;
|
srb_t *sp = ptr;
|
||||||
struct scsi_cmnd *cmd = GET_CMD_SP(sp);
|
struct scsi_cmnd *cmd = GET_CMD_SP(sp);
|
||||||
wait_queue_head_t *cwaitq = sp->cwaitq;
|
struct completion *comp = sp->comp;
|
||||||
|
|
||||||
if (atomic_read(&sp->ref_count) == 0) {
|
if (atomic_read(&sp->ref_count) == 0) {
|
||||||
ql_dbg(ql_dbg_io, sp->vha, 0x3015,
|
ql_dbg(ql_dbg_io, sp->vha, 0x3015,
|
||||||
@ -724,15 +724,15 @@ qla2x00_sp_compl(void *ptr, int res)
|
|||||||
WARN_ON(atomic_read(&sp->ref_count) == 0);
|
WARN_ON(atomic_read(&sp->ref_count) == 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!atomic_dec_and_test(&sp->ref_count))
|
|
||||||
return;
|
atomic_dec(&sp->ref_count);
|
||||||
|
|
||||||
sp->free(sp);
|
sp->free(sp);
|
||||||
cmd->result = res;
|
cmd->result = res;
|
||||||
CMD_SP(cmd) = NULL;
|
CMD_SP(cmd) = NULL;
|
||||||
cmd->scsi_done(cmd);
|
cmd->scsi_done(cmd);
|
||||||
if (cwaitq)
|
if (comp)
|
||||||
wake_up(cwaitq);
|
complete(comp);
|
||||||
qla2x00_rel_sp(sp);
|
qla2x00_rel_sp(sp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -825,7 +825,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
|
|||||||
{
|
{
|
||||||
srb_t *sp = ptr;
|
srb_t *sp = ptr;
|
||||||
struct scsi_cmnd *cmd = GET_CMD_SP(sp);
|
struct scsi_cmnd *cmd = GET_CMD_SP(sp);
|
||||||
wait_queue_head_t *cwaitq = sp->cwaitq;
|
struct completion *comp = sp->comp;
|
||||||
|
|
||||||
if (atomic_read(&sp->ref_count) == 0) {
|
if (atomic_read(&sp->ref_count) == 0) {
|
||||||
ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
|
ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
|
||||||
@ -835,15 +835,15 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
|
|||||||
WARN_ON(atomic_read(&sp->ref_count) == 0);
|
WARN_ON(atomic_read(&sp->ref_count) == 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!atomic_dec_and_test(&sp->ref_count))
|
|
||||||
return;
|
atomic_dec(&sp->ref_count);
|
||||||
|
|
||||||
sp->free(sp);
|
sp->free(sp);
|
||||||
cmd->result = res;
|
cmd->result = res;
|
||||||
CMD_SP(cmd) = NULL;
|
CMD_SP(cmd) = NULL;
|
||||||
cmd->scsi_done(cmd);
|
cmd->scsi_done(cmd);
|
||||||
if (cwaitq)
|
if (comp)
|
||||||
wake_up(cwaitq);
|
complete(comp);
|
||||||
qla2xxx_rel_qpair_sp(sp->qpair, sp);
|
qla2xxx_rel_qpair_sp(sp->qpair, sp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1286,7 +1286,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
|
|||||||
unsigned int id;
|
unsigned int id;
|
||||||
uint64_t lun;
|
uint64_t lun;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int rval, wait = 0;
|
int rval;
|
||||||
struct qla_hw_data *ha = vha->hw;
|
struct qla_hw_data *ha = vha->hw;
|
||||||
struct qla_qpair *qpair;
|
struct qla_qpair *qpair;
|
||||||
|
|
||||||
@ -1299,7 +1299,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
|
|||||||
ret = fc_block_scsi_eh(cmd);
|
ret = fc_block_scsi_eh(cmd);
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
return ret;
|
return ret;
|
||||||
ret = SUCCESS;
|
|
||||||
|
|
||||||
sp = (srb_t *) CMD_SP(cmd);
|
sp = (srb_t *) CMD_SP(cmd);
|
||||||
if (!sp)
|
if (!sp)
|
||||||
@ -1310,7 +1309,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
|
|||||||
return SUCCESS;
|
return SUCCESS;
|
||||||
|
|
||||||
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
|
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
|
||||||
if (!CMD_SP(cmd)) {
|
if (sp->type != SRB_SCSI_CMD || GET_CMD_SP(sp) != cmd) {
|
||||||
/* there's a chance an interrupt could clear
|
/* there's a chance an interrupt could clear
|
||||||
the ptr as part of done & free */
|
the ptr as part of done & free */
|
||||||
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
|
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
|
||||||
@ -1331,66 +1330,31 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
|
|||||||
"Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n",
|
"Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n",
|
||||||
vha->host_no, id, lun, sp, cmd, sp->handle);
|
vha->host_no, id, lun, sp, cmd, sp->handle);
|
||||||
|
|
||||||
/* Get a reference to the sp and drop the lock.*/
|
|
||||||
rval = ha->isp_ops->abort_command(sp);
|
rval = ha->isp_ops->abort_command(sp);
|
||||||
if (rval) {
|
ql_dbg(ql_dbg_taskm, vha, 0x8003,
|
||||||
if (rval == QLA_FUNCTION_PARAMETER_ERROR)
|
"Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
|
||||||
ret = SUCCESS;
|
|
||||||
else
|
|
||||||
ret = FAILED;
|
|
||||||
|
|
||||||
ql_dbg(ql_dbg_taskm, vha, 0x8003,
|
switch (rval) {
|
||||||
"Abort command mbx failed cmd=%p, rval=%x.\n", cmd, rval);
|
case QLA_SUCCESS:
|
||||||
} else {
|
|
||||||
ql_dbg(ql_dbg_taskm, vha, 0x8004,
|
|
||||||
"Abort command mbx success cmd=%p.\n", cmd);
|
|
||||||
wait = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_lock_irqsave(qpair->qp_lock_ptr, flags);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Releasing of the SRB and associated command resources
|
|
||||||
* is managed through ref_count.
|
|
||||||
* Whether we need to wait for the abort completion or complete
|
|
||||||
* the abort handler should be based on the ref_count.
|
|
||||||
*/
|
|
||||||
if (atomic_read(&sp->ref_count) > 1) {
|
|
||||||
/*
|
/*
|
||||||
* The command is not yet completed. We need to wait for either
|
* The command has been aborted. That means that the firmware
|
||||||
* command completion or abort completion.
|
* won't report a completion.
|
||||||
*/
|
*/
|
||||||
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(eh_waitq);
|
sp->done(sp, DID_ABORT << 16);
|
||||||
uint32_t ratov = ha->r_a_tov/10;
|
ret = SUCCESS;
|
||||||
|
break;
|
||||||
/* Go ahead and release the extra ref_count obtained earlier */
|
default:
|
||||||
sp->done(sp, DID_RESET << 16);
|
/*
|
||||||
sp->cwaitq = &eh_waitq;
|
* Either abort failed or abort and completion raced. Let
|
||||||
|
* the SCSI core retry the abort in the former case.
|
||||||
if (!wait_event_lock_irq_timeout(eh_waitq,
|
*/
|
||||||
CMD_SP(cmd) == NULL, *qpair->qp_lock_ptr,
|
ret = FAILED;
|
||||||
msecs_to_jiffies(4 * ratov * 1000))) {
|
break;
|
||||||
/*
|
|
||||||
* The abort got dropped, LOGO will be sent and the
|
|
||||||
* original command will be completed with CS_TIMEOUT
|
|
||||||
* completion
|
|
||||||
*/
|
|
||||||
ql_dbg(ql_dbg_taskm, vha, 0xffff,
|
|
||||||
"%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
|
|
||||||
__func__, ha->r_a_tov);
|
|
||||||
sp->cwaitq = NULL;
|
|
||||||
ret = FAILED;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* Command completed while processing the abort */
|
|
||||||
sp->done(sp, DID_RESET << 16);
|
|
||||||
}
|
}
|
||||||
end:
|
|
||||||
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
|
|
||||||
ql_log(ql_log_info, vha, 0x801c,
|
ql_log(ql_log_info, vha, 0x801c,
|
||||||
"Abort command issued nexus=%ld:%d:%llu -- %d %x.\n",
|
"Abort command issued nexus=%ld:%d:%llu -- %x.\n",
|
||||||
vha->host_no, id, lun, wait, ret);
|
vha->host_no, id, lun, ret);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1766,42 +1730,34 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
|
|||||||
__releases(qp->qp_lock_ptr)
|
__releases(qp->qp_lock_ptr)
|
||||||
__acquires(qp->qp_lock_ptr)
|
__acquires(qp->qp_lock_ptr)
|
||||||
{
|
{
|
||||||
|
DECLARE_COMPLETION_ONSTACK(comp);
|
||||||
scsi_qla_host_t *vha = qp->vha;
|
scsi_qla_host_t *vha = qp->vha;
|
||||||
struct qla_hw_data *ha = vha->hw;
|
struct qla_hw_data *ha = vha->hw;
|
||||||
|
int rval;
|
||||||
|
|
||||||
if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) {
|
if (sp_get(sp))
|
||||||
if (!sp_get(sp)) {
|
return;
|
||||||
/* got sp */
|
|
||||||
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
|
|
||||||
qla_nvme_abort(ha, sp, res);
|
|
||||||
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
|
|
||||||
}
|
|
||||||
} else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
|
|
||||||
!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
|
|
||||||
!qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) {
|
|
||||||
/*
|
|
||||||
* Don't abort commands in adapter during EEH recovery as it's
|
|
||||||
* not accessible/responding.
|
|
||||||
*
|
|
||||||
* Get a reference to the sp and drop the lock. The reference
|
|
||||||
* ensures this sp->done() call and not the call in
|
|
||||||
* qla2xxx_eh_abort() ends the SCSI cmd (with result 'res').
|
|
||||||
*/
|
|
||||||
if (!sp_get(sp)) {
|
|
||||||
int status;
|
|
||||||
|
|
||||||
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
|
if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
|
||||||
status = qla2xxx_eh_abort(GET_CMD_SP(sp));
|
(sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
|
||||||
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
|
!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
|
||||||
/*
|
!qla2x00_isp_reg_stat(ha))) {
|
||||||
* Get rid of extra reference caused
|
sp->comp = ∁
|
||||||
* by early exit from qla2xxx_eh_abort
|
rval = ha->isp_ops->abort_command(sp);
|
||||||
*/
|
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
|
||||||
if (status == FAST_IO_FAIL)
|
|
||||||
atomic_dec(&sp->ref_count);
|
switch (rval) {
|
||||||
|
case QLA_SUCCESS:
|
||||||
|
sp->done(sp, res);
|
||||||
|
break;
|
||||||
|
case QLA_FUNCTION_PARAMETER_ERROR:
|
||||||
|
wait_for_completion(&comp);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
|
||||||
|
sp->comp = NULL;
|
||||||
}
|
}
|
||||||
sp->done(sp, res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
Loading…
Reference in New Issue
Block a user