diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index c7757c78d0b1..d93ef9f1c45c 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -921,18 +921,21 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) complete_job(hdev, job); } -void hl_cs_rollback_all(struct hl_device *hdev) +void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) { int i; struct hl_cs *cs, *tmp; - flush_workqueue(hdev->ts_free_obj_wq); + if (!skip_wq_flush) { + flush_workqueue(hdev->ts_free_obj_wq); - /* flush all completions before iterating over the CS mirror list in - * order to avoid a race with the release functions - */ - for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) - flush_workqueue(hdev->cq_wq[i]); + /* flush all completions before iterating over the CS mirror list in + * order to avoid a race with the release functions + */ + for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) + flush_workqueue(hdev->cq_wq[i]); + + } /* Make sure we don't have leftovers in the CS mirror list */ list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 8ea9dfe3f79b..d52381d1fbd2 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -685,7 +685,8 @@ static void take_release_locks(struct hl_device *hdev) mutex_unlock(&hdev->fpriv_ctrl_list_lock); } -static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset) +static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, + bool skip_wq_flush) { if (hard_reset) device_late_fini(hdev); @@ -698,7 +699,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); /* Go over all the queues, release all CS and their jobs */ - hl_cs_rollback_all(hdev); + hl_cs_rollback_all(hdev, skip_wq_flush); /* Release all pending user interrupts, each pending user interrupt * holds a reference to user context @@ -978,7 +979,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags) int hl_device_reset(struct hl_device *hdev, u32 flags) { bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false, - reset_upon_device_release = false, schedule_hard_reset = false; + reset_upon_device_release = false, schedule_hard_reset = false, + skip_wq_flush = false; u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; struct hl_ctx *ctx; int i, rc; @@ -991,6 +993,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) hard_reset = !!(flags & HL_DRV_RESET_HARD); from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); + skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE); if (!hard_reset && !hdev->asic_prop.supports_soft_reset) { hard_instead_soft = true; @@ -1076,7 +1079,7 @@ again: return 0; } - cleanup_resources(hdev, hard_reset, fw_reset); + cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush); kill_processes: if (hard_reset) { @@ -1686,7 +1689,7 @@ void hl_device_fini(struct hl_device *hdev) hl_hwmon_fini(hdev); - cleanup_resources(hdev, true, false); + cleanup_resources(hdev, true, false, false); /* Kill processes here after CS rollback. This is because the process * can't really exit until all its CSs are done, which is what we diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 677ae4ff922c..cef4717d0916 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3054,7 +3054,7 @@ int hl_cb_pool_fini(struct hl_device *hdev); int hl_cb_va_pool_init(struct hl_ctx *ctx); void hl_cb_va_pool_fini(struct hl_ctx *ctx); -void hl_cs_rollback_all(struct hl_device *hdev); +void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush); struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, enum hl_queue_type queue_type, bool is_kernel_allocated_cb); void hl_sob_reset_error(struct kref *ref);