drm/amdgpu:use job* to replace voluntary
that way we can know which job cause hang and can do per sched reset/recovery instead of all sched. Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
4fbf87e2fe
commit
7225f8736c
@ -2609,14 +2609,13 @@ err:
|
||||
* amdgpu_sriov_gpu_reset - reset the asic
|
||||
*
|
||||
* @adev: amdgpu device pointer
|
||||
* @voluntary: if this reset is requested by guest.
|
||||
* (true means by guest and false means by HYPERVISOR )
|
||||
* @job: which job trigger hang
|
||||
*
|
||||
* Attempt the reset the GPU if it has hung (all asics).
|
||||
* for SRIOV case.
|
||||
* Returns 0 for success or an error on failure.
|
||||
*/
|
||||
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
|
||||
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
|
||||
{
|
||||
int i, r = 0;
|
||||
int resched;
|
||||
@ -2646,7 +2645,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary)
|
||||
amdgpu_fence_driver_force_completion(adev);
|
||||
|
||||
/* request to take full control of GPU before re-initialization */
|
||||
if (voluntary)
|
||||
if (job)
|
||||
amdgpu_virt_reset_gpu(adev);
|
||||
else
|
||||
amdgpu_virt_request_full_gpu(adev, true);
|
||||
|
@ -38,7 +38,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job)
|
||||
job->ring->fence_drv.sync_seq);
|
||||
|
||||
if (amdgpu_sriov_vf(job->adev))
|
||||
amdgpu_sriov_gpu_reset(job->adev, true);
|
||||
amdgpu_sriov_gpu_reset(job->adev, job);
|
||||
else
|
||||
amdgpu_gpu_reset(job->adev);
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v);
|
||||
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
|
||||
int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
|
||||
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary);
|
||||
int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job);
|
||||
int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
|
||||
void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
|
||||
|
||||
|
@ -243,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* Trigger recovery due to world switch failure */
|
||||
amdgpu_sriov_gpu_reset(adev, false);
|
||||
amdgpu_sriov_gpu_reset(adev, NULL);
|
||||
}
|
||||
|
||||
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
|
@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* Trigger recovery due to world switch failure */
|
||||
amdgpu_sriov_gpu_reset(adev, false);
|
||||
amdgpu_sriov_gpu_reset(adev, NULL);
|
||||
}
|
||||
|
||||
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,
|
||||
|
Loading…
Reference in New Issue
Block a user