drm/amdgpu: stop all rings before doing gpu recover
found recover_vram_from_shadow sometimes get executed in paralle with SDMA scheduler, should stop all schedulers before doing gpu reset/recover Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Tested-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
d869ae092e
commit
711826656b
@ -2648,22 +2648,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
|
||||
/* block TTM */
|
||||
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
|
||||
|
||||
/* store modesetting */
|
||||
if (amdgpu_device_has_dc_support(adev))
|
||||
state = drm_atomic_helper_suspend(adev->ddev);
|
||||
|
||||
/* block scheduler */
|
||||
/* block all schedulers and reset given job's ring */
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
/* only focus on the ring hit timeout if &job not NULL */
|
||||
kthread_park(ring->sched.thread);
|
||||
|
||||
if (job && job->ring->idx != i)
|
||||
continue;
|
||||
|
||||
kthread_park(ring->sched.thread);
|
||||
drm_sched_hw_job_reset(&ring->sched, &job->base);
|
||||
|
||||
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
|
||||
@ -2706,33 +2707,22 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
||||
}
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
/* only focus on the ring hit timeout if &job not NULL */
|
||||
if (job && job->ring->idx != i)
|
||||
continue;
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
/* only need recovery sched of the given job's ring
|
||||
* or all rings (in the case @job is NULL)
|
||||
* after above amdgpu_reset accomplished
|
||||
*/
|
||||
if ((!job || job->ring->idx == i) && !r)
|
||||
drm_sched_job_recovery(&ring->sched);
|
||||
kthread_unpark(ring->sched.thread);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !ring->sched.thread)
|
||||
continue;
|
||||
|
||||
/* only focus on the ring hit timeout if &job not NULL */
|
||||
if (job && job->ring->idx != i)
|
||||
continue;
|
||||
|
||||
kthread_unpark(adev->rings[i]->sched.thread);
|
||||
}
|
||||
kthread_unpark(ring->sched.thread);
|
||||
}
|
||||
|
||||
if (amdgpu_device_has_dc_support(adev)) {
|
||||
|
Loading…
Reference in New Issue
Block a user