scsi: qla2xxx: Add heartbeat check
Use "no-op" mailbox command to check if the adapter firmware is still responsive. Link: https://lore.kernel.org/r/20210619052427.6440-1-njavali@marvell.com Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> Signed-off-by: Quinn Tran <qutran@marvell.com> Signed-off-by: Nilesh Javali <njavali@marvell.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
c43ddbf97f
commit
d94d8158e1
@ -3660,6 +3660,8 @@ struct qla_qpair {
|
||||
struct qla_tgt_counters tgt_counters;
|
||||
uint16_t cpuid;
|
||||
struct qla_fw_resources fwres ____cacheline_aligned;
|
||||
u32 cmd_cnt;
|
||||
u32 cmd_completion_cnt;
|
||||
};
|
||||
|
||||
/* Place holder for FW buffer parameters */
|
||||
@ -4616,6 +4618,7 @@ struct qla_hw_data {
|
||||
|
||||
struct qla_hw_data_stat stat;
|
||||
pci_error_state_t pci_error_state;
|
||||
u64 prev_cmd_cnt;
|
||||
};
|
||||
|
||||
struct active_regions {
|
||||
@ -4743,6 +4746,7 @@ typedef struct scsi_qla_host {
|
||||
#define SET_ZIO_THRESHOLD_NEEDED 32
|
||||
#define ISP_ABORT_TO_ROM 33
|
||||
#define VPORT_DELETE 34
|
||||
#define HEARTBEAT_CHK 38
|
||||
|
||||
#define PROCESS_PUREX_IOCB 63
|
||||
|
||||
|
@ -549,6 +549,7 @@ extern int qla2xxx_read_remote_register(scsi_qla_host_t *, uint32_t,
|
||||
uint32_t *);
|
||||
extern int qla2xxx_write_remote_register(scsi_qla_host_t *, uint32_t,
|
||||
uint32_t);
|
||||
void qla_no_op_mb(struct scsi_qla_host *vha);
|
||||
|
||||
/*
|
||||
* Global Function Prototypes in qla_isr.c source file.
|
||||
|
@ -6870,10 +6870,14 @@ qla2x00_abort_isp_cleanup(scsi_qla_host_t *vha)
|
||||
ha->flags.fw_init_done = 0;
|
||||
ha->chip_reset++;
|
||||
ha->base_qpair->chip_reset = ha->chip_reset;
|
||||
ha->base_qpair->cmd_cnt = ha->base_qpair->cmd_completion_cnt = 0;
|
||||
for (i = 0; i < ha->max_qpairs; i++) {
|
||||
if (ha->queue_pair_map[i])
|
||||
if (ha->queue_pair_map[i]) {
|
||||
ha->queue_pair_map[i]->chip_reset =
|
||||
ha->base_qpair->chip_reset;
|
||||
ha->queue_pair_map[i]->cmd_cnt =
|
||||
ha->queue_pair_map[i]->cmd_completion_cnt = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* purge MBox commands */
|
||||
|
@ -1710,6 +1710,7 @@ qla24xx_start_scsi(srb_t *sp)
|
||||
} else
|
||||
req->ring_ptr++;
|
||||
|
||||
sp->qpair->cmd_cnt++;
|
||||
sp->flags |= SRB_DMA_VALID;
|
||||
|
||||
/* Set chip new ring index. */
|
||||
@ -1912,6 +1913,7 @@ qla24xx_dif_start_scsi(srb_t *sp)
|
||||
} else
|
||||
req->ring_ptr++;
|
||||
|
||||
sp->qpair->cmd_cnt++;
|
||||
/* Set chip new ring index. */
|
||||
wrt_reg_dword(req->req_q_in, req->ring_index);
|
||||
|
||||
@ -2068,6 +2070,7 @@ qla2xxx_start_scsi_mq(srb_t *sp)
|
||||
} else
|
||||
req->ring_ptr++;
|
||||
|
||||
sp->qpair->cmd_cnt++;
|
||||
sp->flags |= SRB_DMA_VALID;
|
||||
|
||||
/* Set chip new ring index. */
|
||||
@ -2284,6 +2287,7 @@ qla2xxx_dif_start_scsi_mq(srb_t *sp)
|
||||
} else
|
||||
req->ring_ptr++;
|
||||
|
||||
sp->qpair->cmd_cnt++;
|
||||
/* Set chip new ring index. */
|
||||
wrt_reg_dword(req->req_q_in, req->ring_index);
|
||||
|
||||
|
@ -2322,6 +2322,8 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
|
||||
|
||||
if (unlikely(iocb->u.nvme.aen_op))
|
||||
atomic_dec(&sp->vha->hw->nvme_active_aen_cnt);
|
||||
else
|
||||
sp->qpair->cmd_completion_cnt++;
|
||||
|
||||
if (unlikely(comp_status != CS_COMPLETE))
|
||||
logit = 1;
|
||||
@ -2967,6 +2969,8 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
|
||||
return;
|
||||
}
|
||||
|
||||
sp->qpair->cmd_completion_cnt++;
|
||||
|
||||
/* Fast path completion. */
|
||||
if (comp_status == CS_COMPLETE && scsi_status == 0) {
|
||||
qla2x00_process_completed_request(vha, req, handle);
|
||||
|
@ -6939,3 +6939,30 @@ ql26xx_led_config(scsi_qla_host_t *vha, uint16_t options, uint16_t *led)
|
||||
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* qla_no_op_mb(): This MB is used to check if FW is still alive and
|
||||
* able to generate an interrupt. Otherwise, a timeout will trigger
|
||||
* FW dump + reset
|
||||
* @vha: host adapter pointer
|
||||
* Return: None
|
||||
*/
|
||||
void qla_no_op_mb(struct scsi_qla_host *vha)
|
||||
{
|
||||
mbx_cmd_t mc;
|
||||
mbx_cmd_t *mcp = &mc;
|
||||
int rval;
|
||||
|
||||
memset(&mc, 0, sizeof(mc));
|
||||
mcp->mb[0] = 0; // noop cmd= 0
|
||||
mcp->out_mb = MBX_0;
|
||||
mcp->in_mb = MBX_0;
|
||||
mcp->tov = 5;
|
||||
mcp->flags = 0;
|
||||
rval = qla2x00_mailbox_command(vha, mcp);
|
||||
|
||||
if (rval) {
|
||||
ql_dbg(ql_dbg_async, vha, 0x7071,
|
||||
"Failed %s %x\n", __func__, rval);
|
||||
}
|
||||
}
|
||||
|
@ -536,6 +536,10 @@ static inline int qla2x00_start_nvme_mq(srb_t *sp)
|
||||
req->ring_ptr++;
|
||||
}
|
||||
|
||||
/* ignore nvme async cmd due to long timeout */
|
||||
if (!nvme->u.nvme.aen_op)
|
||||
sp->qpair->cmd_cnt++;
|
||||
|
||||
/* Set chip new ring index. */
|
||||
wrt_reg_dword(req->req_q_in, req->ring_index);
|
||||
|
||||
|
@ -6969,6 +6969,17 @@ intr_on_check:
|
||||
qla2x00_lip_reset(base_vha);
|
||||
}
|
||||
|
||||
if (test_bit(HEARTBEAT_CHK, &base_vha->dpc_flags)) {
|
||||
/*
|
||||
* if there is a mb in progress then that's
|
||||
* enough of a check to see if fw is still ticking.
|
||||
*/
|
||||
if (!ha->flags.mbox_busy && base_vha->flags.init_done)
|
||||
qla_no_op_mb(base_vha);
|
||||
|
||||
clear_bit(HEARTBEAT_CHK, &base_vha->dpc_flags);
|
||||
}
|
||||
|
||||
ha->dpc_active = 0;
|
||||
end_loop:
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
@ -7025,6 +7036,61 @@ qla2x00_rst_aen(scsi_qla_host_t *vha)
|
||||
}
|
||||
}
|
||||
|
||||
static bool qla_do_heartbeat(struct scsi_qla_host *vha)
|
||||
{
|
||||
u64 cmd_cnt, prev_cmd_cnt;
|
||||
bool do_hb = false;
|
||||
struct qla_hw_data *ha = vha->hw;
|
||||
int i;
|
||||
|
||||
/* if cmds are still pending down in fw, then do hb */
|
||||
if (ha->base_qpair->cmd_cnt != ha->base_qpair->cmd_completion_cnt) {
|
||||
do_hb = true;
|
||||
goto skip;
|
||||
}
|
||||
|
||||
for (i = 0; i < ha->max_qpairs; i++) {
|
||||
if (ha->queue_pair_map[i] &&
|
||||
ha->queue_pair_map[i]->cmd_cnt !=
|
||||
ha->queue_pair_map[i]->cmd_completion_cnt) {
|
||||
do_hb = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
skip:
|
||||
prev_cmd_cnt = ha->prev_cmd_cnt;
|
||||
cmd_cnt = ha->base_qpair->cmd_cnt;
|
||||
for (i = 0; i < ha->max_qpairs; i++) {
|
||||
if (ha->queue_pair_map[i])
|
||||
cmd_cnt += ha->queue_pair_map[i]->cmd_cnt;
|
||||
}
|
||||
ha->prev_cmd_cnt = cmd_cnt;
|
||||
|
||||
if (!do_hb && ((cmd_cnt - prev_cmd_cnt) > 50))
|
||||
/*
|
||||
* IOs are completing before periodic hb check.
|
||||
* IOs seems to be running, do hb for sanity check.
|
||||
*/
|
||||
do_hb = true;
|
||||
|
||||
return do_hb;
|
||||
}
|
||||
|
||||
static void qla_heart_beat(struct scsi_qla_host *vha)
|
||||
{
|
||||
if (vha->vp_idx)
|
||||
return;
|
||||
|
||||
if (vha->hw->flags.eeh_busy || qla2x00_chip_is_down(vha))
|
||||
return;
|
||||
|
||||
if (qla_do_heartbeat(vha)) {
|
||||
set_bit(HEARTBEAT_CHK, &vha->dpc_flags);
|
||||
qla2xxx_wake_dpc(vha);
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************************************************
|
||||
* qla2x00_timer
|
||||
*
|
||||
@ -7243,6 +7309,8 @@ qla2x00_timer(struct timer_list *t)
|
||||
qla2xxx_wake_dpc(vha);
|
||||
}
|
||||
|
||||
qla_heart_beat(vha);
|
||||
|
||||
qla2x00_restart_timer(vha, WATCH_INTERVAL);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user