drm/amdgpu: rework dma_resv handling v3
Drop the workaround and instead implement a better solution. Basically we are now chaining all submissions using a dma_fence_chain container and adding them as exclusive fence to the dma_resv object. This way other drivers can still sync to the single exclusive fence while amdgpu only sync to fences from different processes. v3: add the shared fence first before the exclusive one Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: https://patchwork.freedesktop.org/patch/msgid/20210614174536.5188-2-christian.koenig@amd.com
This commit is contained in:
@@ -34,6 +34,7 @@ struct amdgpu_fpriv;
|
|||||||
struct amdgpu_bo_list_entry {
|
struct amdgpu_bo_list_entry {
|
||||||
struct ttm_validate_buffer tv;
|
struct ttm_validate_buffer tv;
|
||||||
struct amdgpu_bo_va *bo_va;
|
struct amdgpu_bo_va *bo_va;
|
||||||
|
struct dma_fence_chain *chain;
|
||||||
uint32_t priority;
|
uint32_t priority;
|
||||||
struct page **user_pages;
|
struct page **user_pages;
|
||||||
bool user_invalidated;
|
bool user_invalidated;
|
||||||
|
|||||||
@@ -572,6 +572,20 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
|
||||||
|
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
||||||
|
|
||||||
|
e->bo_va = amdgpu_vm_bo_find(vm, bo);
|
||||||
|
|
||||||
|
if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
|
||||||
|
e->chain = dma_fence_chain_alloc();
|
||||||
|
if (!e->chain) {
|
||||||
|
r = -ENOMEM;
|
||||||
|
goto error_validate;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
||||||
&p->bytes_moved_vis_threshold);
|
&p->bytes_moved_vis_threshold);
|
||||||
p->bytes_moved = 0;
|
p->bytes_moved = 0;
|
||||||
@@ -599,15 +613,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||||||
gws = p->bo_list->gws_obj;
|
gws = p->bo_list->gws_obj;
|
||||||
oa = p->bo_list->oa_obj;
|
oa = p->bo_list->oa_obj;
|
||||||
|
|
||||||
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
|
|
||||||
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
|
|
||||||
|
|
||||||
/* Make sure we use the exclusive slot for shared BOs */
|
|
||||||
if (bo->prime_shared_count)
|
|
||||||
e->tv.num_shared = 0;
|
|
||||||
e->bo_va = amdgpu_vm_bo_find(vm, bo);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gds) {
|
if (gds) {
|
||||||
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
|
p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
|
||||||
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
|
p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
|
||||||
@@ -629,8 +634,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|||||||
}
|
}
|
||||||
|
|
||||||
error_validate:
|
error_validate:
|
||||||
if (r)
|
if (r) {
|
||||||
|
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
|
||||||
|
dma_fence_chain_free(e->chain);
|
||||||
|
e->chain = NULL;
|
||||||
|
}
|
||||||
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
|
||||||
|
}
|
||||||
out:
|
out:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
@@ -670,9 +680,17 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
|
|||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
if (error && backoff)
|
if (error && backoff) {
|
||||||
|
struct amdgpu_bo_list_entry *e;
|
||||||
|
|
||||||
|
amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
|
||||||
|
dma_fence_chain_free(e->chain);
|
||||||
|
e->chain = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
ttm_eu_backoff_reservation(&parser->ticket,
|
ttm_eu_backoff_reservation(&parser->ticket,
|
||||||
&parser->validated);
|
&parser->validated);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < parser->num_post_deps; i++) {
|
for (i = 0; i < parser->num_post_deps; i++) {
|
||||||
drm_syncobj_put(parser->post_deps[i].syncobj);
|
drm_syncobj_put(parser->post_deps[i].syncobj);
|
||||||
@@ -1245,6 +1263,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
|
|||||||
|
|
||||||
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
|
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
|
||||||
|
|
||||||
|
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
|
||||||
|
struct dma_resv *resv = e->tv.bo->base.resv;
|
||||||
|
struct dma_fence_chain *chain = e->chain;
|
||||||
|
|
||||||
|
if (!chain)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Work around dma_resv shortcommings by wrapping up the
|
||||||
|
* submission in a dma_fence_chain and add it as exclusive
|
||||||
|
* fence, but first add the submission as shared fence to make
|
||||||
|
* sure that shared fences never signal before the exclusive
|
||||||
|
* one.
|
||||||
|
*/
|
||||||
|
dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
|
||||||
|
dma_fence_get(p->fence), 1);
|
||||||
|
|
||||||
|
dma_resv_add_shared_fence(resv, p->fence);
|
||||||
|
rcu_assign_pointer(resv->fence_excl, &chain->base);
|
||||||
|
e->chain = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
|
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
|
||||||
mutex_unlock(&p->adev->notifier_lock);
|
mutex_unlock(&p->adev->notifier_lock);
|
||||||
|
|
||||||
|
|||||||
@@ -42,48 +42,6 @@
|
|||||||
#include <linux/pci-p2pdma.h>
|
#include <linux/pci-p2pdma.h>
|
||||||
#include <linux/pm_runtime.h>
|
#include <linux/pm_runtime.h>
|
||||||
|
|
||||||
static int
|
|
||||||
__dma_resv_make_exclusive(struct dma_resv *obj)
|
|
||||||
{
|
|
||||||
struct dma_fence **fences;
|
|
||||||
unsigned int count;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
if (!dma_resv_shared_list(obj)) /* no shared fences to convert */
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
r = dma_resv_get_fences(obj, NULL, &count, &fences);
|
|
||||||
if (r)
|
|
||||||
return r;
|
|
||||||
|
|
||||||
if (count == 0) {
|
|
||||||
/* Now that was unexpected. */
|
|
||||||
} else if (count == 1) {
|
|
||||||
dma_resv_add_excl_fence(obj, fences[0]);
|
|
||||||
dma_fence_put(fences[0]);
|
|
||||||
kfree(fences);
|
|
||||||
} else {
|
|
||||||
struct dma_fence_array *array;
|
|
||||||
|
|
||||||
array = dma_fence_array_create(count, fences,
|
|
||||||
dma_fence_context_alloc(1), 0,
|
|
||||||
false);
|
|
||||||
if (!array)
|
|
||||||
goto err_fences_put;
|
|
||||||
|
|
||||||
dma_resv_add_excl_fence(obj, &array->base);
|
|
||||||
dma_fence_put(&array->base);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
err_fences_put:
|
|
||||||
while (count--)
|
|
||||||
dma_fence_put(fences[count]);
|
|
||||||
kfree(fences);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
|
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
|
||||||
*
|
*
|
||||||
@@ -110,24 +68,6 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
|
|||||||
if (r < 0)
|
if (r < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
r = amdgpu_bo_reserve(bo, false);
|
|
||||||
if (unlikely(r != 0))
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We only create shared fences for internal use, but importers
|
|
||||||
* of the dmabuf rely on exclusive fences for implicitly
|
|
||||||
* tracking write hazards. As any of the current fences may
|
|
||||||
* correspond to a write, we need to convert all existing
|
|
||||||
* fences on the reservation object into a single exclusive
|
|
||||||
* fence.
|
|
||||||
*/
|
|
||||||
r = __dma_resv_make_exclusive(bo->tbo.base.resv);
|
|
||||||
if (r)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
bo->prime_shared_count++;
|
|
||||||
amdgpu_bo_unreserve(bo);
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@@ -150,9 +90,6 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
|
|||||||
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
|
||||||
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
||||||
|
|
||||||
if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
|
|
||||||
bo->prime_shared_count--;
|
|
||||||
|
|
||||||
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
|
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
|
||||||
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
|
||||||
}
|
}
|
||||||
@@ -406,8 +343,6 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
|
|||||||
bo = gem_to_amdgpu_bo(gobj);
|
bo = gem_to_amdgpu_bo(gobj);
|
||||||
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
|
bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
|
||||||
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
|
bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
|
||||||
if (dma_buf->ops != &amdgpu_dmabuf_ops)
|
|
||||||
bo->prime_shared_count = 1;
|
|
||||||
|
|
||||||
dma_resv_unlock(resv);
|
dma_resv_unlock(resv);
|
||||||
return gobj;
|
return gobj;
|
||||||
|
|||||||
@@ -829,7 +829,8 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case AMDGPU_GEM_OP_SET_PLACEMENT:
|
case AMDGPU_GEM_OP_SET_PLACEMENT:
|
||||||
if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
|
if (robj->tbo.base.import_attach &&
|
||||||
|
args->value & AMDGPU_GEM_DOMAIN_VRAM) {
|
||||||
r = -EINVAL;
|
r = -EINVAL;
|
||||||
amdgpu_bo_unreserve(robj);
|
amdgpu_bo_unreserve(robj);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -892,7 +892,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* A shared bo cannot be migrated to VRAM */
|
/* A shared bo cannot be migrated to VRAM */
|
||||||
if (bo->prime_shared_count || bo->tbo.base.import_attach) {
|
if (bo->tbo.base.import_attach) {
|
||||||
if (domain & AMDGPU_GEM_DOMAIN_GTT)
|
if (domain & AMDGPU_GEM_DOMAIN_GTT)
|
||||||
domain = AMDGPU_GEM_DOMAIN_GTT;
|
domain = AMDGPU_GEM_DOMAIN_GTT;
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -99,7 +99,6 @@ struct amdgpu_bo {
|
|||||||
struct ttm_buffer_object tbo;
|
struct ttm_buffer_object tbo;
|
||||||
struct ttm_bo_kmap_obj kmap;
|
struct ttm_bo_kmap_obj kmap;
|
||||||
u64 flags;
|
u64 flags;
|
||||||
unsigned prime_shared_count;
|
|
||||||
/* per VM structure for page tables and with virtual addresses */
|
/* per VM structure for page tables and with virtual addresses */
|
||||||
struct amdgpu_vm_bo_base *vm_bo;
|
struct amdgpu_vm_bo_base *vm_bo;
|
||||||
/* Constant after initialization */
|
/* Constant after initialization */
|
||||||
|
|||||||
Reference in New Issue
Block a user