scsi: qla2xxx: Flush mailbox commands on chip reset

Fix race condition between Interrupt thread and Chip reset thread in trying
to flush the same mailbox. With the race condition, the "ha->mbx_intr_comp"
will get an extra complete() call. The extra complete call create erroneous
mailbox timeout condition when the next mailbox is sent where the mailbox
call does not wait for interrupt to arrive. Instead, it advances without
waiting.

Add lock protection around the check for mailbox completion.

Cc: stable@vger.kernel.org
Fixes: b2000805a9 ("scsi: qla2xxx: Flush mailbox commands on chip reset")
Signed-off-by: Quinn Tran <quinn.tran@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Link: https://lore.kernel.org/r/20230821130045.34850-3-njavali@marvell.com
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
Quinn Tran 2023-08-21 18:30:38 +05:30 committed by Martin K. Petersen
parent 875386b988
commit 6d0b65569c
4 changed files with 4 additions and 9 deletions

View File

@ -4418,7 +4418,6 @@ struct qla_hw_data {
uint8_t aen_mbx_count;
atomic_t num_pend_mbx_stage1;
atomic_t num_pend_mbx_stage2;
atomic_t num_pend_mbx_stage3;
uint16_t frame_payload_size;
uint32_t login_retry_count;

View File

@ -7391,14 +7391,15 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
}
/* purge MBox commands */
if (atomic_read(&ha->num_pend_mbx_stage3)) {
spin_lock_irqsave(&ha->hardware_lock, flags);
if (test_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags)) {
clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
complete(&ha->mbx_intr_comp);
}
spin_unlock_irqrestore(&ha->hardware_lock, flags);
i = 0;
while (atomic_read(&ha->num_pend_mbx_stage3) ||
atomic_read(&ha->num_pend_mbx_stage2) ||
while (atomic_read(&ha->num_pend_mbx_stage2) ||
atomic_read(&ha->num_pend_mbx_stage1)) {
msleep(20);
i++;

View File

@ -273,7 +273,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
spin_unlock_irqrestore(&ha->hardware_lock, flags);
wait_time = jiffies;
atomic_inc(&ha->num_pend_mbx_stage3);
if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
mcp->tov * HZ)) {
ql_dbg(ql_dbg_mbx, vha, 0x117a,
@ -290,7 +289,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
spin_unlock_irqrestore(&ha->hardware_lock,
flags);
atomic_dec(&ha->num_pend_mbx_stage2);
atomic_dec(&ha->num_pend_mbx_stage3);
rval = QLA_ABORTED;
goto premature_exit;
}
@ -302,11 +300,9 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
ha->flags.mbox_busy = 0;
spin_unlock_irqrestore(&ha->hardware_lock, flags);
atomic_dec(&ha->num_pend_mbx_stage2);
atomic_dec(&ha->num_pend_mbx_stage3);
rval = QLA_ABORTED;
goto premature_exit;
}
atomic_dec(&ha->num_pend_mbx_stage3);
if (time_after(jiffies, wait_time + 5 * HZ))
ql_log(ql_log_warn, vha, 0x1015, "cmd=0x%x, waited %d msecs\n",

View File

@ -3008,7 +3008,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
ha->max_exchg = FW_MAX_EXCHANGES_CNT;
atomic_set(&ha->num_pend_mbx_stage1, 0);
atomic_set(&ha->num_pend_mbx_stage2, 0);
atomic_set(&ha->num_pend_mbx_stage3, 0);
atomic_set(&ha->zio_threshold, DEFAULT_ZIO_THRESHOLD);
ha->last_zio_threshold = DEFAULT_ZIO_THRESHOLD;
INIT_LIST_HEAD(&ha->tmf_pending);