drm/amdgpu: sync to KFD fences before clearing PTEs

This patch tries to solve the basic problem we also need to sync to
the KFD fences of the BO because otherwise it can be that we clear
PTEs while the KFD queues are still running.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Christian König 2024-08-21 13:55:41 +02:00 committed by Alex Deucher
parent 4771d2ecb7
commit 126be9b2be
3 changed files with 37 additions and 0 deletions

View File

@ -260,6 +260,36 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
/**
* amdgpu_sync_kfd - sync to KFD fences
*
* @sync: sync object to add KFD fences to
* @resv: reservation object with KFD fences
*
* Extract all KFD fences and add them to the sync object.
*/
int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv)
{
struct dma_resv_iter cursor;
struct dma_fence *f;
int r = 0;
dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP);
dma_resv_for_each_fence_unlocked(&cursor, f) {
void *fence_owner = amdgpu_sync_get_owner(f);
if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
continue;
r = amdgpu_sync_fence(sync, f);
if (r)
break;
}
dma_resv_iter_end(&cursor);
return r;
}
/* Free the entry back to the slab */
static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
{

View File

@ -51,6 +51,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);

View File

@ -1169,6 +1169,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
AMDGPU_SYNC_EQ_OWNER, vm);
if (r)
goto error_free;
if (bo) {
r = amdgpu_sync_kfd(&sync, bo->tbo.base.resv);
if (r)
goto error_free;
}
} else {
struct drm_gem_object *obj = &bo->tbo.base;