habanalabs: rename reset flags
Rename reset flags for better readability as compared to HL_RESET_CAUSE* enum shared with the f/w. Signed-off-by: Bharat Jauhari <bjauhari@habana.ai> Reviewed-by: Oded Gabbay <ogabbay@kernel.org> Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
parent
e84e31a912
commit
234caa5273
@ -767,7 +767,7 @@ static void cs_timedout(struct work_struct *work)
|
||||
|
||||
if (likely(!skip_reset_on_timeout)) {
|
||||
if (hdev->reset_on_lockup)
|
||||
hl_device_reset(hdev, HL_RESET_TDR);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_TDR);
|
||||
else
|
||||
hdev->needs_reset = true;
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ static void hpriv_release(struct kref *ref)
|
||||
|
||||
if ((hdev->reset_if_device_not_idle && !device_is_idle)
|
||||
|| hdev->reset_upon_device_release)
|
||||
hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
|
||||
|
||||
/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
|
||||
* thread, we don't care because the in_reset is marked so if a user will try to open
|
||||
@ -330,10 +330,10 @@ static void device_hard_reset_pending(struct work_struct *work)
|
||||
u32 flags;
|
||||
int rc;
|
||||
|
||||
flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
|
||||
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_RESET_THR;
|
||||
|
||||
if (device_reset_work->fw_reset)
|
||||
flags |= HL_RESET_FW;
|
||||
flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
|
||||
|
||||
rc = hl_device_reset(hdev, flags);
|
||||
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
|
||||
@ -541,7 +541,7 @@ static void hl_device_heartbeat(struct work_struct *work)
|
||||
goto reschedule;
|
||||
|
||||
dev_err(hdev->dev, "Device heartbeat failed!\n");
|
||||
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
|
||||
|
||||
return;
|
||||
|
||||
@ -552,7 +552,7 @@ reschedule:
|
||||
* If control reached here, then at least one heartbeat work has been
|
||||
* scheduled since last reset/init cycle.
|
||||
* So if the device is not already in reset cycle, reset the flag
|
||||
* prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
|
||||
* prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
|
||||
* status for at least one heartbeat. From this point driver restarts
|
||||
* tracking future consecutive fatal errors.
|
||||
*/
|
||||
@ -831,7 +831,7 @@ int hl_device_resume(struct hl_device *hdev)
|
||||
hdev->disabled = false;
|
||||
atomic_set(&hdev->in_reset, 0);
|
||||
|
||||
rc = hl_device_reset(hdev, HL_RESET_HARD);
|
||||
rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
if (rc) {
|
||||
dev_err(hdev->dev, "Failed to reset device during resume\n");
|
||||
goto disable_device;
|
||||
@ -948,15 +948,15 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
|
||||
* ('in_reset' makes sure of it). This makes sure that
|
||||
* 'reset_cause' will continue holding its 1st recorded reason!
|
||||
*/
|
||||
if (flags & HL_RESET_HEARTBEAT) {
|
||||
if (flags & HL_DRV_RESET_HEARTBEAT) {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
|
||||
cur_reset_trigger = HL_RESET_HEARTBEAT;
|
||||
} else if (flags & HL_RESET_TDR) {
|
||||
cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
|
||||
} else if (flags & HL_DRV_RESET_TDR) {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
|
||||
cur_reset_trigger = HL_RESET_TDR;
|
||||
} else if (flags & HL_RESET_FW_FATAL_ERR) {
|
||||
cur_reset_trigger = HL_DRV_RESET_TDR;
|
||||
} else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
|
||||
cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
} else {
|
||||
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
|
||||
}
|
||||
@ -979,8 +979,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
|
||||
* If F/W is performing the reset, no need to send it a message to disable
|
||||
* PCI access
|
||||
*/
|
||||
if ((flags & HL_RESET_HARD) &&
|
||||
!(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
|
||||
if ((flags & HL_DRV_RESET_HARD) &&
|
||||
!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
|
||||
/* Disable PCI access from device F/W so he won't send
|
||||
* us additional interrupts. We disable MSI/MSI-X at
|
||||
* the halt_engines function and we can't have the F/W
|
||||
@ -1025,9 +1025,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
hard_reset = !!(flags & HL_RESET_HARD);
|
||||
from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
|
||||
fw_reset = !!(flags & HL_RESET_FW);
|
||||
hard_reset = !!(flags & HL_DRV_RESET_HARD);
|
||||
from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
|
||||
fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
|
||||
|
||||
if (!hard_reset && !hdev->supports_soft_reset) {
|
||||
hard_instead_soft = true;
|
||||
@ -1035,7 +1035,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
|
||||
}
|
||||
|
||||
if (hdev->reset_upon_device_release &&
|
||||
(flags & HL_RESET_DEVICE_RELEASE)) {
|
||||
(flags & HL_DRV_RESET_DEV_RELEASE)) {
|
||||
dev_dbg(hdev->dev,
|
||||
"Perform %s-reset upon device release\n",
|
||||
hard_reset ? "hard" : "soft");
|
||||
@ -1075,7 +1075,7 @@ do_reset:
|
||||
|
||||
if (hard_reset)
|
||||
dev_info(hdev->dev, "Going to reset device\n");
|
||||
else if (flags & HL_RESET_DEVICE_RELEASE)
|
||||
else if (flags & HL_DRV_RESET_DEV_RELEASE)
|
||||
dev_info(hdev->dev,
|
||||
"Going to reset device after it was released by user\n");
|
||||
else
|
||||
@ -1171,7 +1171,7 @@ kill_processes:
|
||||
hdev->hard_reset_pending = false;
|
||||
|
||||
if (hdev->reset_trigger_repeated &&
|
||||
(hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
|
||||
(hdev->prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR)) {
|
||||
/* if there 2 back to back resets from FW,
|
||||
* ensure driver puts the driver in a unusable state
|
||||
*/
|
||||
|
@ -120,37 +120,37 @@ enum hl_mmu_page_table_location {
|
||||
/*
|
||||
* Reset Flags
|
||||
*
|
||||
* - HL_RESET_HARD
|
||||
* - HL_DRV_RESET_HARD
|
||||
* If set do hard reset to all engines. If not set reset just
|
||||
* compute/DMA engines.
|
||||
*
|
||||
* - HL_RESET_FROM_RESET_THREAD
|
||||
* - HL_DRV_RESET_FROM_RESET_THR
|
||||
* Set if the caller is the hard-reset thread
|
||||
*
|
||||
* - HL_RESET_HEARTBEAT
|
||||
* - HL_DRV_RESET_HEARTBEAT
|
||||
* Set if reset is due to heartbeat
|
||||
*
|
||||
* - HL_RESET_TDR
|
||||
* - HL_DRV_RESET_TDR
|
||||
* Set if reset is due to TDR
|
||||
*
|
||||
* - HL_RESET_DEVICE_RELEASE
|
||||
* - HL_DRV_RESET_DEV_RELEASE
|
||||
* Set if reset is due to device release
|
||||
*
|
||||
* - HL_RESET_FW
|
||||
* - HL_DRV_RESET_BYPASS_REQ_TO_FW
|
||||
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
|
||||
* only when running with secured f/w
|
||||
*
|
||||
* - HL_RESET_FW_FATAL_ERR
|
||||
* - HL_DRV_RESET_FW_FATAL_ERR
|
||||
* Set if reset is due to a fatal error from FW
|
||||
*/
|
||||
|
||||
#define HL_RESET_HARD (1 << 0)
|
||||
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
|
||||
#define HL_RESET_HEARTBEAT (1 << 2)
|
||||
#define HL_RESET_TDR (1 << 3)
|
||||
#define HL_RESET_DEVICE_RELEASE (1 << 4)
|
||||
#define HL_RESET_FW (1 << 5)
|
||||
#define HL_RESET_FW_FATAL_ERR (1 << 6)
|
||||
#define HL_DRV_RESET_HARD (1 << 0)
|
||||
#define HL_DRV_RESET_FROM_RESET_THR (1 << 1)
|
||||
#define HL_DRV_RESET_HEARTBEAT (1 << 2)
|
||||
#define HL_DRV_RESET_TDR (1 << 3)
|
||||
#define HL_DRV_RESET_DEV_RELEASE (1 << 4)
|
||||
#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
|
||||
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
|
||||
|
||||
#define HL_MAX_SOBS_PER_MONITOR 8
|
||||
|
||||
|
@ -316,7 +316,7 @@ static int free_phys_pg_pack(struct hl_device *hdev,
|
||||
}
|
||||
|
||||
if (rc && !hdev->disabled)
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
|
||||
end:
|
||||
kvfree(phys_pg_pack->pages);
|
||||
|
@ -236,7 +236,7 @@ static ssize_t hard_reset_store(struct device *dev,
|
||||
|
||||
dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
|
||||
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
|
||||
out:
|
||||
return count;
|
||||
|
@ -8003,7 +8003,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
|
||||
gaudi_print_irq_info(hdev, event_type, true);
|
||||
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
|
||||
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
|
||||
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_GIC500:
|
||||
@ -8011,7 +8011,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
case GAUDI_EVENT_L2_RAM_ECC:
|
||||
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
|
||||
gaudi_print_irq_info(hdev, event_type, false);
|
||||
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
|
||||
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_HBM0_SPI_0:
|
||||
@ -8022,7 +8022,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
gaudi_hbm_read_interrupts(hdev,
|
||||
gaudi_hbm_event_to_dev(event_type),
|
||||
&eq_entry->hbm_ecc_data);
|
||||
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
|
||||
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
|
||||
goto reset_device;
|
||||
|
||||
case GAUDI_EVENT_HBM0_SPI_1:
|
||||
@ -8205,9 +8205,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
|
||||
|
||||
reset_device:
|
||||
if (hdev->asic_prop.fw_security_enabled)
|
||||
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD
|
||||
| HL_DRV_RESET_BYPASS_REQ_TO_FW
|
||||
| fw_fatal_err_flag);
|
||||
else if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
|
||||
else
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
}
|
||||
@ -8260,7 +8262,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
|
||||
if (rc) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"MMU cache invalidation timeout\n");
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
@ -4838,14 +4838,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, (HL_RESET_HARD |
|
||||
HL_RESET_FW_FATAL_ERR));
|
||||
hl_device_reset(hdev, (HL_DRV_RESET_HARD |
|
||||
HL_DRV_RESET_FW_FATAL_ERR));
|
||||
break;
|
||||
|
||||
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
break;
|
||||
|
||||
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
|
||||
@ -4905,7 +4905,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
|
||||
goya_print_irq_info(hdev, event_type, false);
|
||||
goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
|
||||
if (hdev->hard_reset_on_fw_events)
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
else
|
||||
hl_fw_unmask_irq(hdev, event_type);
|
||||
break;
|
||||
@ -5239,7 +5239,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
|
||||
if (rc) {
|
||||
dev_err_ratelimited(hdev->dev,
|
||||
"MMU cache invalidation timeout\n");
|
||||
hl_device_reset(hdev, HL_RESET_HARD);
|
||||
hl_device_reset(hdev, HL_DRV_RESET_HARD);
|
||||
}
|
||||
|
||||
return rc;
|
||||
|
Loading…
Reference in New Issue
Block a user