From d4e8ad908b2095388a300b19d76b78c170052a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 8 Sep 2022 08:43:15 +0200 Subject: [PATCH] drm/amdgpu: reorder CS code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sort the functions in the order they are called Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 674 +++++++++++++------------ 1 file changed, 338 insertions(+), 336 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ee02fecc27b7..e104d7ef3c3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -278,6 +278,323 @@ free_chunk: return ret; } +static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, + struct amdgpu_cs_parser *parser) +{ + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + int r, ce_preempt = 0, de_preempt = 0; + struct amdgpu_ring *ring; + int i, j; + + for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct drm_sched_entity *entity; + + chunk = &parser->chunks[i]; + ib = &parser->job->ibs[j]; + chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && + chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { + if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) + ce_preempt++; + else + de_preempt++; + + /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ + if (ce_preempt > 1 || de_preempt > 1) + return -EINVAL; + } + + r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &entity); + if (r) + return r; + + if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->job->preamble_status |= + AMDGPU_PREAMBLE_IB_PRESENT; + + if (parser->entity && parser->entity != entity) + return -EINVAL; + + /* Return if there is no run queue associated with this entity. + * Possibly because of disabled HW IP*/ + if (entity->rq == NULL) + return -EINVAL; + + parser->entity = entity; + + ring = to_amdgpu_ring(entity->rq->sched); + r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; + } + + ib->gpu_addr = chunk_ib->va_start; + ib->length_dw = chunk_ib->ib_bytes / 4; + ib->flags = chunk_ib->flags; + + j++; + } + + /* MM engine doesn't support user fences */ + ring = to_amdgpu_ring(parser->entity->rq->sched); + if (parser->job->uf_addr && ring->funcs->no_user_fence) + return -EINVAL; + + return 0; +} + +static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned num_deps; + int i, r; + struct drm_amdgpu_cs_chunk_dep *deps; + + deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_dep); + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; + struct dma_fence *fence; + + ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); + if (ctx == NULL) + return -EINVAL; + + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); + amdgpu_ctx_put(ctx); + + if (IS_ERR(fence)) + return PTR_ERR(fence); + else if (!fence) + continue; + + if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { + struct drm_sched_fence *s_fence; + struct dma_fence *old = fence; + + s_fence = to_drm_sched_fence(fence); + fence = dma_fence_get(&s_fence->scheduled); + dma_fence_put(old); + } + + r = amdgpu_sync_fence(&p->job->sync, fence); + dma_fence_put(fence); + if (r) + return r; + } + return 0; +} + +static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, + uint32_t handle, u64 point, + u64 flags) +{ + struct dma_fence *fence; + int r; + + r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); + if (r) { + DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", + handle, point, r); + return r; + } + + r = amdgpu_sync_fence(&p->job->sync, fence); + dma_fence_put(fence); + + return r; +} + +static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps; + unsigned num_deps; + int i, r; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, + 0, 0); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i, r; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + for (i = 0; i < num_deps; ++i) { + r = amdgpu_syncobj_lookup_and_add_to_sync(p, + syncobj_deps[i].handle, + syncobj_deps[i].point, + syncobj_deps[i].flags); + if (r) + return r; + } + + return 0; +} + +static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_sem *deps; + unsigned num_deps; + int i; + + deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_sem); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + + for (i = 0; i < num_deps; ++i) { + p->post_deps[i].syncobj = + drm_syncobj_find(p->filp, deps[i].handle); + if (!p->post_deps[i].syncobj) + return -EINVAL; + p->post_deps[i].chain = NULL; + p->post_deps[i].point = 0; + p->num_post_deps++; + } + + return 0; +} + + +static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, + struct amdgpu_cs_chunk *chunk) +{ + struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; + unsigned num_deps; + int i; + + syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; + num_deps = chunk->length_dw * 4 / + sizeof(struct drm_amdgpu_cs_chunk_syncobj); + + if (p->post_deps) + return -EINVAL; + + p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), + GFP_KERNEL); + p->num_post_deps = 0; + + if (!p->post_deps) + return -ENOMEM; + + for (i = 0; i < num_deps; ++i) { + struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; + + dep->chain = NULL; + if (syncobj_deps[i].point) { + dep->chain = dma_fence_chain_alloc(); + if (!dep->chain) + return -ENOMEM; + } + + dep->syncobj = drm_syncobj_find(p->filp, + syncobj_deps[i].handle); + if (!dep->syncobj) { + dma_fence_chain_free(dep->chain); + return -EINVAL; + } + dep->point = syncobj_deps[i].point; + p->num_post_deps++; + } + + return 0; +} + +static int amdgpu_cs_dependencies(struct amdgpu_device *adev, + struct amdgpu_cs_parser *p) +{ + int i, r; + + for (i = 0; i < p->nchunks; ++i) { + struct amdgpu_cs_chunk *chunk; + + chunk = &p->chunks[i]; + + switch (chunk->chunk_id) { + case AMDGPU_CHUNK_ID_DEPENDENCIES: + case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: + r = amdgpu_cs_process_fence_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_IN: + r = amdgpu_cs_process_syncobj_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: + r = amdgpu_cs_process_syncobj_out_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: + r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); + if (r) + return r; + break; + case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: + r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); + if (r) + return r; + break; + } + } + + return 0; +} + /* Convert microseconds to bytes. */ static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) { @@ -659,25 +976,15 @@ out_free_user_pages: return r; } -static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) +static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) { - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_bo_list_entry *e; - int r; + int i; - list_for_each_entry(e, &p->validated, tv.head) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct dma_resv *resv = bo->tbo.base.resv; - enum amdgpu_sync_mode sync_mode; + if (!trace_amdgpu_cs_enabled()) + return; - sync_mode = amdgpu_bo_explicit_sync(bo) ? - AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, - &fpriv->vm); - if (r) - return r; - } - return 0; + for (i = 0; i < parser->job->num_ibs; i++) + trace_amdgpu_cs(parser, i); } static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) @@ -753,8 +1060,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } if (!p->job->vm) - return amdgpu_cs_sync_rings(p); - + return 0; r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) @@ -826,324 +1132,27 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) } } - return amdgpu_cs_sync_rings(p); -} - -static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) -{ - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - int r, ce_preempt = 0, de_preempt = 0; - struct amdgpu_ring *ring; - int i, j; - - for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { - struct amdgpu_cs_chunk *chunk; - struct amdgpu_ib *ib; - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct drm_sched_entity *entity; - - chunk = &parser->chunks[i]; - ib = &parser->job->ibs[j]; - chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; - - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) - continue; - - if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && - chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) { - if (chunk_ib->flags & AMDGPU_IB_FLAG_CE) - ce_preempt++; - else - de_preempt++; - - /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ - if (ce_preempt > 1 || de_preempt > 1) - return -EINVAL; - } - - r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &entity); - if (r) - return r; - - if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) - parser->job->preamble_status |= - AMDGPU_PREAMBLE_IB_PRESENT; - - if (parser->entity && parser->entity != entity) - return -EINVAL; - - /* Return if there is no run queue associated with this entity. - * Possibly because of disabled HW IP*/ - if (entity->rq == NULL) - return -EINVAL; - - parser->entity = entity; - - ring = to_amdgpu_ring(entity->rq->sched); - r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, - AMDGPU_IB_POOL_DELAYED, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - ib->gpu_addr = chunk_ib->va_start; - ib->length_dw = chunk_ib->ib_bytes / 4; - ib->flags = chunk_ib->flags; - - j++; - } - - /* MM engine doesn't support user fences */ - ring = to_amdgpu_ring(parser->entity->rq->sched); - if (parser->job->uf_addr && ring->funcs->no_user_fence) - return -EINVAL; - return 0; } -static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) +static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned num_deps; - int i, r; - struct drm_amdgpu_cs_chunk_dep *deps; - - deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_dep); - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_ctx *ctx; - struct drm_sched_entity *entity; - struct dma_fence *fence; - - ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); - if (ctx == NULL) - return -EINVAL; - - r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &entity); - if (r) { - amdgpu_ctx_put(ctx); - return r; - } - - fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); - amdgpu_ctx_put(ctx); - - if (IS_ERR(fence)) - return PTR_ERR(fence); - else if (!fence) - continue; - - if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) { - struct drm_sched_fence *s_fence; - struct dma_fence *old = fence; - - s_fence = to_drm_sched_fence(fence); - fence = dma_fence_get(&s_fence->scheduled); - dma_fence_put(old); - } - - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - if (r) - return r; - } - return 0; -} - -static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, - uint32_t handle, u64 point, - u64 flags) -{ - struct dma_fence *fence; + struct amdgpu_bo_list_entry *e; int r; - r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence); - if (r) { - DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n", - handle, point, r); - return r; - } + list_for_each_entry(e, &p->validated, tv.head) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct dma_resv *resv = bo->tbo.base.resv; + enum amdgpu_sync_mode sync_mode; - r = amdgpu_sync_fence(&p->job->sync, fence); - dma_fence_put(fence); - - return r; -} - -static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i, r; - - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle, - 0, 0); + sync_mode = amdgpu_bo_explicit_sync(bo) ? + AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, + &fpriv->vm); if (r) return r; } - - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i, r; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - for (i = 0; i < num_deps; ++i) { - r = amdgpu_syncobj_lookup_and_add_to_sync(p, - syncobj_deps[i].handle, - syncobj_deps[i].point, - syncobj_deps[i].flags); - if (r) - return r; - } - - return 0; -} - -static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_sem *deps; - unsigned num_deps; - int i; - - deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_sem); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - - for (i = 0; i < num_deps; ++i) { - p->post_deps[i].syncobj = - drm_syncobj_find(p->filp, deps[i].handle); - if (!p->post_deps[i].syncobj) - return -EINVAL; - p->post_deps[i].chain = NULL; - p->post_deps[i].point = 0; - p->num_post_deps++; - } - - return 0; -} - - -static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk) -{ - struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps; - unsigned num_deps; - int i; - - syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata; - num_deps = chunk->length_dw * 4 / - sizeof(struct drm_amdgpu_cs_chunk_syncobj); - - if (p->post_deps) - return -EINVAL; - - p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps), - GFP_KERNEL); - p->num_post_deps = 0; - - if (!p->post_deps) - return -ENOMEM; - - for (i = 0; i < num_deps; ++i) { - struct amdgpu_cs_post_dep *dep = &p->post_deps[i]; - - dep->chain = NULL; - if (syncobj_deps[i].point) { - dep->chain = dma_fence_chain_alloc(); - if (!dep->chain) - return -ENOMEM; - } - - dep->syncobj = drm_syncobj_find(p->filp, - syncobj_deps[i].handle); - if (!dep->syncobj) { - dma_fence_chain_free(dep->chain); - return -EINVAL; - } - dep->point = syncobj_deps[i].point; - p->num_post_deps++; - } - - return 0; -} - -static int amdgpu_cs_dependencies(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - int i, r; - - for (i = 0; i < p->nchunks; ++i) { - struct amdgpu_cs_chunk *chunk; - - chunk = &p->chunks[i]; - - switch (chunk->chunk_id) { - case AMDGPU_CHUNK_ID_DEPENDENCIES: - case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: - r = amdgpu_cs_process_fence_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_IN: - r = amdgpu_cs_process_syncobj_in_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: - r = amdgpu_cs_process_syncobj_out_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: - r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); - if (r) - return r; - break; - case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: - r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); - if (r) - return r; - break; - } - } - return 0; } @@ -1243,17 +1252,6 @@ error_unlock: return r; } -static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser) -{ - int i; - - if (!trace_amdgpu_cs_enabled()) - return; - - for (i = 0; i < parser->job->num_ibs; i++) - trace_amdgpu_cs(parser, i); -} - /* Cleanup the parser structure */ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff) @@ -1342,6 +1340,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + r = amdgpu_cs_sync_rings(&parser); + if (r) + goto out; + r = amdgpu_cs_submit(&parser, data); out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers);