habanalabs: rename reset flags

Rename reset flags for better readability as compared to
HL_RESET_CAUSE* enum shared with the f/w.

Signed-off-by: Bharat Jauhari <bjauhari@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Bharat Jauhari 2021-09-16 14:00:38 +03:00 committed by Oded Gabbay
parent e84e31a912
commit 234caa5273
7 changed files with 50 additions and 48 deletions

View File

@ -767,7 +767,7 @@ static void cs_timedout(struct work_struct *work)
if (likely(!skip_reset_on_timeout)) {
if (hdev->reset_on_lockup)
hl_device_reset(hdev, HL_RESET_TDR);
hl_device_reset(hdev, HL_DRV_RESET_TDR);
else
hdev->needs_reset = true;
}

View File

@ -95,7 +95,7 @@ static void hpriv_release(struct kref *ref)
if ((hdev->reset_if_device_not_idle && !device_is_idle)
|| hdev->reset_upon_device_release)
hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
* thread, we don't care because the in_reset is marked so if a user will try to open
@ -330,10 +330,10 @@ static void device_hard_reset_pending(struct work_struct *work)
u32 flags;
int rc;
flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
flags = HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_RESET_THR;
if (device_reset_work->fw_reset)
flags |= HL_RESET_FW;
flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
rc = hl_device_reset(hdev, flags);
if ((rc == -EBUSY) && !hdev->device_fini_pending) {
@ -541,7 +541,7 @@ static void hl_device_heartbeat(struct work_struct *work)
goto reschedule;
dev_err(hdev->dev, "Device heartbeat failed!\n");
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
hl_device_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT);
return;
@ -552,7 +552,7 @@ reschedule:
* If control reached here, then at least one heartbeat work has been
* scheduled since last reset/init cycle.
* So if the device is not already in reset cycle, reset the flag
* prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
* prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR
* status for at least one heartbeat. From this point driver restarts
* tracking future consecutive fatal errors.
*/
@ -831,7 +831,7 @@ int hl_device_resume(struct hl_device *hdev)
hdev->disabled = false;
atomic_set(&hdev->in_reset, 0);
rc = hl_device_reset(hdev, HL_RESET_HARD);
rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
if (rc) {
dev_err(hdev->dev, "Failed to reset device during resume\n");
goto disable_device;
@ -948,15 +948,15 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
if (flags & HL_RESET_HEARTBEAT) {
if (flags & HL_DRV_RESET_HEARTBEAT) {
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
cur_reset_trigger = HL_RESET_HEARTBEAT;
} else if (flags & HL_RESET_TDR) {
cur_reset_trigger = HL_DRV_RESET_HEARTBEAT;
} else if (flags & HL_DRV_RESET_TDR) {
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
cur_reset_trigger = HL_RESET_TDR;
} else if (flags & HL_RESET_FW_FATAL_ERR) {
cur_reset_trigger = HL_DRV_RESET_TDR;
} else if (flags & HL_DRV_RESET_FW_FATAL_ERR) {
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR;
} else {
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
@ -979,8 +979,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
if ((flags & HL_RESET_HARD) &&
!(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
if ((flags & HL_DRV_RESET_HARD) &&
!(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
@ -1025,9 +1025,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
return 0;
}
hard_reset = !!(flags & HL_RESET_HARD);
from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
fw_reset = !!(flags & HL_RESET_FW);
hard_reset = !!(flags & HL_DRV_RESET_HARD);
from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
if (!hard_reset && !hdev->supports_soft_reset) {
hard_instead_soft = true;
@ -1035,7 +1035,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
}
if (hdev->reset_upon_device_release &&
(flags & HL_RESET_DEVICE_RELEASE)) {
(flags & HL_DRV_RESET_DEV_RELEASE)) {
dev_dbg(hdev->dev,
"Perform %s-reset upon device release\n",
hard_reset ? "hard" : "soft");
@ -1075,7 +1075,7 @@ do_reset:
if (hard_reset)
dev_info(hdev->dev, "Going to reset device\n");
else if (flags & HL_RESET_DEVICE_RELEASE)
else if (flags & HL_DRV_RESET_DEV_RELEASE)
dev_info(hdev->dev,
"Going to reset device after it was released by user\n");
else
@ -1171,7 +1171,7 @@ kill_processes:
hdev->hard_reset_pending = false;
if (hdev->reset_trigger_repeated &&
(hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
(hdev->prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR)) {
/* if there 2 back to back resets from FW,
* ensure driver puts the driver in a unusable state
*/

View File

@ -120,37 +120,37 @@ enum hl_mmu_page_table_location {
/*
* Reset Flags
*
* - HL_RESET_HARD
* - HL_DRV_RESET_HARD
* If set do hard reset to all engines. If not set reset just
* compute/DMA engines.
*
* - HL_RESET_FROM_RESET_THREAD
* - HL_DRV_RESET_FROM_RESET_THR
* Set if the caller is the hard-reset thread
*
* - HL_RESET_HEARTBEAT
* - HL_DRV_RESET_HEARTBEAT
* Set if reset is due to heartbeat
*
* - HL_RESET_TDR
* - HL_DRV_RESET_TDR
* Set if reset is due to TDR
*
* - HL_RESET_DEVICE_RELEASE
* - HL_DRV_RESET_DEV_RELEASE
* Set if reset is due to device release
*
* - HL_RESET_FW
* - HL_DRV_RESET_BYPASS_REQ_TO_FW
* F/W will perform the reset. No need to ask it to reset the device. This is relevant
* only when running with secured f/w
*
* - HL_RESET_FW_FATAL_ERR
* - HL_DRV_RESET_FW_FATAL_ERR
* Set if reset is due to a fatal error from FW
*/
#define HL_RESET_HARD (1 << 0)
#define HL_RESET_FROM_RESET_THREAD (1 << 1)
#define HL_RESET_HEARTBEAT (1 << 2)
#define HL_RESET_TDR (1 << 3)
#define HL_RESET_DEVICE_RELEASE (1 << 4)
#define HL_RESET_FW (1 << 5)
#define HL_RESET_FW_FATAL_ERR (1 << 6)
#define HL_DRV_RESET_HARD (1 << 0)
#define HL_DRV_RESET_FROM_RESET_THR (1 << 1)
#define HL_DRV_RESET_HEARTBEAT (1 << 2)
#define HL_DRV_RESET_TDR (1 << 3)
#define HL_DRV_RESET_DEV_RELEASE (1 << 4)
#define HL_DRV_RESET_BYPASS_REQ_TO_FW (1 << 5)
#define HL_DRV_RESET_FW_FATAL_ERR (1 << 6)
#define HL_MAX_SOBS_PER_MONITOR 8

View File

@ -316,7 +316,7 @@ static int free_phys_pg_pack(struct hl_device *hdev,
}
if (rc && !hdev->disabled)
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
end:
kvfree(phys_pg_pack->pages);

View File

@ -236,7 +236,7 @@ static ssize_t hard_reset_store(struct device *dev,
dev_warn(hdev->dev, "Hard-Reset requested through sysfs\n");
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
out:
return count;

View File

@ -8003,7 +8003,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
gaudi_print_irq_info(hdev, event_type, true);
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_GIC500:
@ -8011,7 +8011,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_L2_RAM_ECC:
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
gaudi_print_irq_info(hdev, event_type, false);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_HBM0_SPI_0:
@ -8022,7 +8022,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
gaudi_hbm_read_interrupts(hdev,
gaudi_hbm_event_to_dev(event_type),
&eq_entry->hbm_ecc_data);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
goto reset_device;
case GAUDI_EVENT_HBM0_SPI_1:
@ -8205,9 +8205,11 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
reset_device:
if (hdev->asic_prop.fw_security_enabled)
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
hl_device_reset(hdev, HL_DRV_RESET_HARD
| HL_DRV_RESET_BYPASS_REQ_TO_FW
| fw_fatal_err_flag);
else if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
hl_device_reset(hdev, HL_DRV_RESET_HARD | fw_fatal_err_flag);
else
hl_fw_unmask_irq(hdev, event_type);
}
@ -8260,7 +8262,7 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
}
return rc;

View File

@ -4838,14 +4838,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, (HL_RESET_HARD |
HL_RESET_FW_FATAL_ERR));
hl_device_reset(hdev, (HL_DRV_RESET_HARD |
HL_DRV_RESET_FW_FATAL_ERR));
break;
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
break;
case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
@ -4905,7 +4905,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
goya_print_irq_info(hdev, event_type, false);
goya_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
else
hl_fw_unmask_irq(hdev, event_type);
break;
@ -5239,7 +5239,7 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
if (rc) {
dev_err_ratelimited(hdev->dev,
"MMU cache invalidation timeout\n");
hl_device_reset(hdev, HL_RESET_HARD);
hl_device_reset(hdev, HL_DRV_RESET_HARD);
}
return rc;