drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2

Protect the struct amdgpu_bo_list with a mutex. This is used during command
submission in order to avoid buffer object corruption as recorded in
the link below.

v2 (chk): Keep the mutex looked for the whole CS to avoid using the
	  list from multiple CS threads at the same time.

Suggested-by: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <Alexander.Deucher@amd.com>
Cc: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Cc: Vitaly Prosyak <Vitaly.Prosyak@amd.com>
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2048
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
This commit is contained in:
Luben Tuikov 2022-07-20 15:04:18 -04:00 committed by Alex Deucher
parent e1aadbab44
commit 90af0ca047
3 changed files with 19 additions and 4 deletions

View File

@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
mutex_init(&list->bo_list_mutex);
*result = list;
return 0;

View File

@ -47,6 +47,10 @@ struct amdgpu_bo_list {
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
/* Protect access during command submission.
*/
struct mutex bo_list_mutex;
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,

View File

@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
mutex_lock(&p->bo_list->bo_list_mutex);
/* One for TTM and one for the CS job */
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->tv.num_shared = 2;
@ -651,6 +653,7 @@ out_free_user_pages:
kvfree(e->user_pages);
e->user_pages = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);
}
return r;
}
@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
if (error && backoff)
if (error && backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
mutex_unlock(&parser->bo_list->bo_list_mutex);
}
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
continue;
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
if (r) {
mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
}
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
if (r) {
mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
}
}
r = amdgpu_vm_handle_moved(adev, vm);
@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;